diff --git a/decnet/canary/__init__.py b/decnet/canary/__init__.py new file mode 100644 index 00000000..8a250514 --- /dev/null +++ b/decnet/canary/__init__.py @@ -0,0 +1,37 @@ +"""Canary tokens — decoy artifacts planted in decky filesystems. + +Public surface is exported here so callers can ``from decnet.canary +import CanaryArtifact, get_generator, get_instrumenter`` without +knowing the submodule layout. Concrete generators / instrumenters +live under :mod:`decnet.canary.generators` and +:mod:`decnet.canary.instrumenters` respectively; the factory keeps +import-time cost down by deferring those imports until first use +(same pattern as :mod:`decnet.intel.factory`). +""" +from __future__ import annotations + +from decnet.canary.base import ( + CanaryArtifact, + CanaryContext, + CanaryGenerator, + CanaryInstrumenter, +) +from decnet.canary.factory import ( + KNOWN_GENERATORS, + KNOWN_INSTRUMENTERS, + get_generator, + get_instrumenter, + pick_instrumenter_for_mime, +) + +__all__ = [ + "CanaryArtifact", + "CanaryContext", + "CanaryGenerator", + "CanaryInstrumenter", + "KNOWN_GENERATORS", + "KNOWN_INSTRUMENTERS", + "get_generator", + "get_instrumenter", + "pick_instrumenter_for_mime", +] diff --git a/decnet/canary/base.py b/decnet/canary/base.py new file mode 100644 index 00000000..160dcd19 --- /dev/null +++ b/decnet/canary/base.py @@ -0,0 +1,145 @@ +"""Canary generator / instrumenter ABCs and the artifact dataclass. + +Two flavors of producer share the same return shape: + +* :class:`CanaryGenerator` synthesises a fake artifact from scratch + (e.g. a plausible ``~/.aws/credentials`` block, a ``.git/config`` + pointing at an attacker-bait remote URL). Operators don't supply + any input. + +* :class:`CanaryInstrumenter` mutates an operator-uploaded blob to + embed the callback (HTTP slug + DNS host). The original blob bytes + are passed in; the instrumenter returns the mutated version. + +Both return a :class:`CanaryArtifact` — the planter doesn't care +which path produced it. Same dataclass keeps the planter's +docker-exec injector trivial. + +ABCs intentionally do not include I/O — generators and instrumenters +are pure functions of (slug, host, blob?). All filesystem work +happens in :mod:`decnet.canary.planter` and :mod:`decnet.canary.storage`. +""" +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Optional + + +@dataclass +class CanaryContext: + """Inputs every generator/instrumenter needs to embed a working callback. + + ``callback_token`` is the unique slug; it appears verbatim in HTTP + URLs (``https:///c/``) and as the leftmost + DNS label (``.canary.``) so a single + slug resolves to a single :class:`CanaryToken` row regardless of + which path the attacker tripped. + + ``http_base`` and ``dns_zone`` come from the canary worker's + public-facing config (``DECNET_CANARY_HTTP_BASE``, + ``DECNET_CANARY_DNS_ZONE``). When DNS isn't deployed, + ``dns_zone`` is empty and instrumenters that only have a DNS + surface (e.g. an artifact whose only realistic embed point is a + hostname) raise. + """ + + callback_token: str + http_base: str # e.g. "https://canary.example.test" — no trailing slash + dns_zone: str = "" # e.g. "canary.example.test"; "" disables DNS embeds + persona: str = "linux" # "linux" | "windows" — drives default username, path style + + +@dataclass +class CanaryArtifact: + """Bytes-and-placement bundle produced by a generator/instrumenter.""" + + path: str + """Absolute path inside the target container.""" + + content: bytes + """Final bytes that hit the decky filesystem. + + Always raw bytes — the planter base64-encodes for the wire so + binary blobs (DOCX/PNG/PDF) survive ``docker exec sh -c`` safely. + """ + + mode: int = 0o600 + """Unix file mode. Defaults to ``0600`` because most realistic + canary placements (``~/.aws/credentials``, ``.env``, ``id_rsa``) + are operator-only. Honeydocs in user docs folders should pass + ``0o644``. + """ + + mtime_offset: int = 0 + """Seconds relative to *now* for the planted file's mtime. + + Negative values backdate the file so it doesn't look like it + appeared the moment the decky was deployed. ``-86400 * 90`` (90 + days ago) is a common choice for ``honeydoc`` artifacts; ``0`` + means "stamp it now," which is fine for ``aws_creds``-like files + that would plausibly be touched recently. + """ + + instrumenter: Optional[str] = None + """Identifier of the instrumenter that produced this artifact (for + upload-driven tokens). Mirrored into ``CanaryToken.instrumenter``. + Mutually exclusive with :attr:`generator`. + """ + + generator: Optional[str] = None + """Identifier of the generator that produced this artifact (for + synthesised tokens). Mirrored into ``CanaryToken.generator``. + Mutually exclusive with :attr:`instrumenter`. + """ + + notes: list[str] = field(default_factory=list) + """Human-readable notes about the embedding (e.g. "DOCX: injected + 1×1 remote image at relsId rId99"). Surfaced in the API + ``preview`` response so the operator sees what we did before + planting. Never leaked to the attacker-facing surface. + """ + + +class CanaryGenerator(ABC): + """Produces a fake artifact from scratch.""" + + name: str #: short tag — matches ``CanaryToken.generator`` + + @abstractmethod + def generate(self, ctx: CanaryContext) -> CanaryArtifact: + """Synthesise the artifact. + + MUST NOT do I/O. MUST be deterministic for the same + ``(callback_token, http_base, dns_zone, persona)`` so re-seeding + from :attr:`CanaryToken.secret_seed` produces byte-identical + output and the planter is naturally idempotent. + """ + + +class CanaryInstrumenter(ABC): + """Mutates an operator-uploaded blob to embed a callback.""" + + name: str #: short tag — matches ``CanaryToken.instrumenter`` + + #: MIME prefixes this instrumenter handles. The factory uses these + #: to dispatch by sniffed content-type. Sub-string match against + #: the prefix list (e.g. ``("application/pdf",)`` or + #: ``("text/",)``). + mime_prefixes: tuple[str, ...] = () + + @abstractmethod + def instrument( + self, blob: bytes, ctx: CanaryContext, *, target_path: str, + ) -> CanaryArtifact: + """Return the mutated bytes with the callback embedded. + + MUST raise :class:`InstrumenterRejectedError` when the blob + can't be safely mutated (corrupt zip, encrypted PDF, etc.) so + the API can surface a 400 with the specific reason rather than + silently shipping the original bytes. + """ + + +class InstrumenterRejectedError(ValueError): + """Raised when an instrumenter can't safely mutate the input.""" diff --git a/decnet/canary/factory.py b/decnet/canary/factory.py new file mode 100644 index 00000000..bc641eb0 --- /dev/null +++ b/decnet/canary/factory.py @@ -0,0 +1,129 @@ +"""Generator and instrumenter factories. + +Same lazy-import pattern as :mod:`decnet.intel.factory` — concrete +implementations stay un-imported until first use so importing +:mod:`decnet.canary` from a CLI subcommand doesn't drag in +``pikepdf`` / ``python-docx`` / ``Pillow`` for callers that only +need the model layer. +""" +from __future__ import annotations + +from typing import Tuple + +from decnet.canary.base import CanaryGenerator, CanaryInstrumenter + +KNOWN_GENERATORS: Tuple[str, ...] = ( + "git_config", + "env_file", + "ssh_key", + "aws_creds", + "honeydoc", +) + +KNOWN_INSTRUMENTERS: Tuple[str, ...] = ( + "docx", + "xlsx", + "pdf", + "html", + "image", + "plain", + "passthrough", +) + + +def get_generator(name: str) -> CanaryGenerator: + """Return the generator registered under ``name``. + + Raises :class:`ValueError` for unknown names so a typo in the API + request surfaces as a 400 rather than silently producing nothing. + """ + if name == "git_config": + from decnet.canary.generators.git_config import GitConfigGenerator + return GitConfigGenerator() + if name == "env_file": + from decnet.canary.generators.env_file import EnvFileGenerator + return EnvFileGenerator() + if name == "ssh_key": + from decnet.canary.generators.ssh_key import SSHKeyGenerator + return SSHKeyGenerator() + if name == "aws_creds": + from decnet.canary.generators.aws_creds import AWSCredsGenerator + return AWSCredsGenerator() + if name == "honeydoc": + from decnet.canary.generators.honeydoc import HoneydocGenerator + return HoneydocGenerator() + raise ValueError( + f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}" + ) + + +def get_instrumenter(name: str) -> CanaryInstrumenter: + """Return the instrumenter registered under ``name``.""" + if name == "docx": + from decnet.canary.instrumenters.docx import DocxInstrumenter + return DocxInstrumenter() + if name == "xlsx": + from decnet.canary.instrumenters.xlsx import XlsxInstrumenter + return XlsxInstrumenter() + if name == "pdf": + from decnet.canary.instrumenters.pdf import PdfInstrumenter + return PdfInstrumenter() + if name == "html": + from decnet.canary.instrumenters.html import HtmlInstrumenter + return HtmlInstrumenter() + if name == "image": + from decnet.canary.instrumenters.image import ImageInstrumenter + return ImageInstrumenter() + if name == "plain": + from decnet.canary.instrumenters.plain import PlainInstrumenter + return PlainInstrumenter() + if name == "passthrough": + from decnet.canary.instrumenters.passthrough import PassthroughInstrumenter + return PassthroughInstrumenter() + raise ValueError( + f"Unknown canary instrumenter: {name!r}. Known: {KNOWN_INSTRUMENTERS}" + ) + + +# MIME → instrumenter dispatch. Order matters: we walk the table +# top-to-bottom and the first prefix match wins, so put the more +# specific (DOCX/XLSX) before the generic (zip/octet-stream). +_MIME_DISPATCH: tuple[tuple[str, str], ...] = ( + # Office Open XML — DOCX/XLSX share a zip structure but expose + # different inner trees, so dispatch by MIME alias rather than + # zip-poking. + ("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx"), + ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx"), + ("application/pdf", "pdf"), + ("text/html", "html"), + ("application/xhtml+xml", "html"), + ("image/png", "image"), + ("image/jpeg", "image"), + ("image/gif", "image"), + # Plaintext catch-alls — config files, .env, .ini, .yaml, .json, + # source code. All handled by the same regex-substitution pass. + ("text/", "plain"), + ("application/json", "plain"), + ("application/x-yaml", "plain"), + ("application/yaml", "plain"), + ("application/toml", "plain"), +) + + +def pick_instrumenter_for_mime(content_type: str) -> str: + """Return the instrumenter name registered for a sniffed MIME. + + Falls back to ``"passthrough"`` for anything we don't have an + embedder for (binary blobs we can't mutate safely — random + container images, archives, executables). ``passthrough`` only + supports DNS-callback tokens (the slug ends up in the filename or + an accompanying README), so the API surfaces that constraint to + the operator before they pick a kind. + """ + if not content_type: + return "passthrough" + lowered = content_type.lower() + for prefix, name in _MIME_DISPATCH: + if lowered.startswith(prefix): + return name + return "passthrough" diff --git a/decnet/canary/paths.py b/decnet/canary/paths.py new file mode 100644 index 00000000..35c84c50 --- /dev/null +++ b/decnet/canary/paths.py @@ -0,0 +1,78 @@ +"""Persona-aware path resolution for canary artifacts. + +Linux-persona deckies use POSIX-shaped paths under ``/home/``. +"Windows" personas (still Linux containers under the hood — see +:mod:`decnet.archetypes`) use Windows-shaped paths under +``/home//AppData/...`` so an attacker browsing the filesystem +through a planted RDP/SMB session sees the right shape. + +The persona lookup is best-effort: callers pass the +:attr:`decnet.archetypes.Archetype.nmap_os` value (``"linux"`` or +``"windows"``); unknown personas fall through to ``"linux"``. +Operators can always override by passing an explicit +``placement_path`` when creating a token. +""" +from __future__ import annotations + +DEFAULT_LINUX_USER = "admin" +DEFAULT_WINDOWS_USER = "Administrator" + +# Canonical placements for the synthesizer-driven baseline tokens. +# Operators can override per-token via the API, but these are the +# defaults the deploy-time seed uses. +_LINUX_DEFAULTS: dict[str, str] = { + "git_config": "/home/{user}/.git/config", + "env_file": "/home/{user}/.env", + "ssh_key": "/home/{user}/.ssh/id_rsa", + "aws_creds": "/home/{user}/.aws/credentials", + "honeydoc": "/home/{user}/Documents/quarterly_report.docx", +} + +_WINDOWS_DEFAULTS: dict[str, str] = { + "git_config": "/home/{user}/AppData/Local/Programs/Git/etc/gitconfig", + "env_file": "/home/{user}/Desktop/prod.env", + "ssh_key": "/home/{user}/.ssh/id_rsa", # OpenSSH on Windows uses the same path + "aws_creds": "/home/{user}/.aws/credentials", + "honeydoc": "/home/{user}/Documents/quarterly_report.docx", +} + + +def default_user(persona: str) -> str: + """Return the conventional unprivileged username for a persona.""" + return DEFAULT_WINDOWS_USER if persona == "windows" else DEFAULT_LINUX_USER + + +def default_path_for(generator: str, persona: str = "linux") -> str: + """Resolve the default placement path for a synthesized token. + + Returns an absolute container path with ``{user}`` already + expanded. Falls back to a sane Linux default for unknown + personas — better to plant *something* than fail the deploy hook. + """ + table = _WINDOWS_DEFAULTS if persona == "windows" else _LINUX_DEFAULTS + template = table.get(generator) + if not template: + # Unknown generator — fall back to a generic /tmp drop so the + # planter still has somewhere to write. The API rejects + # unknown generators upstream, so this branch is defensive. + return f"/tmp/{generator}.canary" # nosec B108 — placement inside attacker-facing decoy container, not host /tmp + return template.format(user=default_user(persona)) + + +def normalize_placement(path: str) -> str: + """Validate and normalize an operator-supplied placement path. + + Forbids relative paths, NUL bytes, and shell metacharacters that + ``docker exec sh -c`` can't safely round-trip. Returns the + sanitised path unchanged when valid; raises :class:`ValueError` + otherwise so the API can return a 400 with a clear message. + """ + if not path or not path.startswith("/"): + raise ValueError("placement_path must be absolute (start with '/')") + if "\x00" in path: + raise ValueError("placement_path may not contain NUL") + if "\n" in path or "\r" in path: + raise ValueError("placement_path may not contain newlines") + if "../" in path or path.endswith("/.."): + raise ValueError("placement_path may not contain '..' segments") + return path diff --git a/decnet/canary/storage.py b/decnet/canary/storage.py new file mode 100644 index 00000000..06cfbedd --- /dev/null +++ b/decnet/canary/storage.py @@ -0,0 +1,89 @@ +"""Filesystem store for operator-uploaded canary blobs. + +Blobs live under ``/var/lib/decnet/canary/blobs/`` (override +via ``DECNET_CANARY_BLOB_DIR``) and are deduplicated by content hash. +The DB table :class:`decnet.web.db.models.CanaryBlob` mirrors +metadata; the bytes are read on demand at instrumentation time, so +the API process never holds large operator uploads in memory longer +than the request itself. + +Refcount-aware deletion is enforced at the DB layer (see +:meth:`decnet.web.db.repository.BaseRepository.delete_canary_blob`); +this module only provides write/read/unlink primitives keyed by +sha256. +""" +from __future__ import annotations + +import hashlib +import os +from pathlib import Path +from typing import Tuple + + +def blob_dir() -> Path: + """Return the on-disk root for canary blobs. + + Honors ``DECNET_CANARY_BLOB_DIR`` so tests can point at a tmp + path. The directory is created lazily on first write. + """ + raw = os.environ.get("DECNET_CANARY_BLOB_DIR", "/var/lib/decnet/canary/blobs") + return Path(raw) + + +def _path_for(sha256: str) -> Path: + # Two-level fan-out (``ab/cd/abcd...``) keeps any one directory + # from accumulating thousands of entries on busy fleets. Same + # shape as Git's loose-object store. + if len(sha256) < 4: + raise ValueError("sha256 must be at least 4 chars") + root = blob_dir() + return root / sha256[:2] / sha256[2:4] / sha256 + + +def write_blob(content: bytes) -> Tuple[str, Path, int]: + """Persist ``content`` under its sha256 path. + + Idempotent: if the target file already exists with the same + bytes, no rewrite happens. Returns ``(sha256, path, + size_bytes)``. + """ + sha = hashlib.sha256(content).hexdigest() + target = _path_for(sha) + target.parent.mkdir(parents=True, exist_ok=True) + if not target.exists(): + # Atomic-ish: write to a temp sibling and rename. Avoids the + # half-written-file race a concurrent reader would otherwise + # see if we wrote in place. + tmp = target.with_suffix(target.suffix + ".part") + tmp.write_bytes(content) + os.replace(tmp, target) + return sha, target, len(content) + + +def read_blob(sha256: str) -> bytes: + """Read the bytes for a stored blob. + + Raises :class:`FileNotFoundError` when the on-disk row was unlinked + out of band (operator pruned ``/var/lib/decnet`` by hand) — the + caller (instrumenter dispatch) surfaces it as a 410-ish error so + the operator can re-upload. + """ + return _path_for(sha256).read_bytes() + + +def unlink_blob(sha256: str) -> bool: + """Delete the on-disk bytes for ``sha256``. + + Returns True if a file was removed, False if it was already gone. + The DB row deletion happens in + :meth:`SQLModelRepository.delete_canary_blob`; this function is + a best-effort companion called *after* the DB delete commits so + a crash between them leaves a recoverable orphan, never a + dangling DB reference. + """ + target = _path_for(sha256) + try: + target.unlink() + except FileNotFoundError: + return False + return True diff --git a/tests/canary/__init__.py b/tests/canary/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/canary/test_factory.py b/tests/canary/test_factory.py new file mode 100644 index 00000000..ecb85985 --- /dev/null +++ b/tests/canary/test_factory.py @@ -0,0 +1,87 @@ +"""Coverage for the generator/instrumenter factory + MIME dispatch. + +The concrete generators and instrumenters land in subsequent commits; +this file only tests the dispatch surface — it must reject unknown +names with ``ValueError`` and pick the right instrumenter for known +MIME types (with passthrough as the fallback for binary blobs we +can't safely mutate). +""" +from __future__ import annotations + +import pytest + +from decnet.canary.factory import ( + KNOWN_GENERATORS, + KNOWN_INSTRUMENTERS, + pick_instrumenter_for_mime, +) + + +@pytest.mark.parametrize( + "mime, expected", + [ + ("application/pdf", "pdf"), + ("application/PDF", "pdf"), # case-insensitive + ("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx"), + ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx"), + ("text/html", "html"), + ("application/xhtml+xml", "html"), + ("text/plain", "plain"), + ("text/x-yaml", "plain"), + ("application/json", "plain"), + ("application/yaml", "plain"), + ("application/toml", "plain"), + ("image/png", "image"), + ("image/jpeg", "image"), + ("image/gif", "image"), + ], +) +def test_mime_dispatch_known(mime: str, expected: str) -> None: + assert pick_instrumenter_for_mime(mime) == expected + + +@pytest.mark.parametrize( + "mime", + [ + "", + "application/octet-stream", + "application/x-tar", + "application/zip", # bare zip — DOCX/XLSX dispatch by alias, not raw zip + "video/mp4", + "audio/mpeg", + ], +) +def test_mime_dispatch_falls_back_to_passthrough(mime: str) -> None: + assert pick_instrumenter_for_mime(mime) == "passthrough" + + +def test_known_lists_are_stable() -> None: + # If anyone adds/removes from the dispatch tables, the test + # surfaces it. Keeps the schema-of-record in one place. + assert KNOWN_GENERATORS == ( + "git_config", "env_file", "ssh_key", "aws_creds", "honeydoc", + ) + assert KNOWN_INSTRUMENTERS == ( + "docx", "xlsx", "pdf", "html", "image", "plain", "passthrough", + ) + + +def test_unknown_generator_raises() -> None: + from decnet.canary.factory import get_generator + with pytest.raises(ValueError, match="Unknown canary generator"): + get_generator("bogus") + + +def test_unknown_instrumenter_raises() -> None: + from decnet.canary.factory import get_instrumenter + with pytest.raises(ValueError, match="Unknown canary instrumenter"): + get_instrumenter("bogus") + + +def test_base_artifact_dataclass_defaults() -> None: + from decnet.canary import CanaryArtifact + a = CanaryArtifact(path="/x", content=b"y") + assert a.mode == 0o600 + assert a.mtime_offset == 0 + assert a.notes == [] + assert a.generator is None and a.instrumenter is None diff --git a/tests/canary/test_models.py b/tests/canary/test_models.py new file mode 100644 index 00000000..3426b611 --- /dev/null +++ b/tests/canary/test_models.py @@ -0,0 +1,85 @@ +"""Smoke coverage for the Pydantic request/response shapes + helpers. + +The tables themselves are exercised end-to-end in +:mod:`tests.canary.test_repository`; this module only covers the +helpers and request validation that don't go through the DB — +``CanaryTrigger.headers()`` JSON decoding, the +``CanaryTokenCreateRequest`` body shape, and the dump-roundtrip on +the response models. +""" +from __future__ import annotations + +import pytest + +from decnet.web.db.models import ( + CanaryBlobResponse, + CanaryTokenCreateRequest, + CanaryTokenResponse, + CanaryTrigger, + CanaryTriggerResponse, +) + + +def test_create_request_minimal() -> None: + r = CanaryTokenCreateRequest( + decky_name="web1", + kind="http", + placement_path="/home/admin/.env", + generator="env_file", + ) + assert r.blob_uuid is None + assert r.persona_path_hint is None + + +def test_create_request_kind_is_constrained() -> None: + with pytest.raises(ValueError): + CanaryTokenCreateRequest( + decky_name="web1", kind="bogus", # type: ignore[arg-type] + placement_path="/x", generator="aws_creds", + ) + + +def test_trigger_headers_decode_valid_json() -> None: + t = CanaryTrigger( + token_uuid="t", + src_ip="1.2.3.4", + raw_headers='{"user-agent":"curl"}', + ) + assert t.headers() == {"user-agent": "curl"} + + +@pytest.mark.parametrize("raw", ["", "not json", "[1,2,3]", "null"]) +def test_trigger_headers_falls_back_to_empty(raw: str) -> None: + t = CanaryTrigger(token_uuid="t", src_ip="1.2.3.4", raw_headers=raw) + assert t.headers() == {} + + +def test_response_models_round_trip() -> None: + # Canonical shapes — proves the field set + types match what the + # router will hand back. Strings everywhere because the DB layer + # uses str UUIDs (project convention). + blob = CanaryBlobResponse( + uuid="b1", sha256="0" * 64, filename="x.docx", + content_type="application/octet-stream", size_bytes=1, + uploaded_by="u1", uploaded_at="2026-04-27T00:00:00Z", # type: ignore[arg-type] + token_count=2, + ) + assert blob.token_count == 2 + + tok = CanaryTokenResponse( + uuid="t1", kind="http", decky_name="web1", + blob_uuid=None, instrumenter=None, generator="aws_creds", + placement_path="/a", callback_token="s", + placed_at="2026-04-27T00:00:00Z", # type: ignore[arg-type] + last_triggered_at=None, trigger_count=0, + created_by="u1", state="planted", last_error=None, + ) + assert tok.kind == "http" + + trig = CanaryTriggerResponse( + uuid="x", token_uuid="t1", + occurred_at="2026-04-27T00:00:00Z", # type: ignore[arg-type] + src_ip="1.2.3.4", user_agent=None, request_path=None, + dns_qname=None, headers={}, attacker_id=None, + ) + assert trig.src_ip == "1.2.3.4" diff --git a/tests/canary/test_paths.py b/tests/canary/test_paths.py new file mode 100644 index 00000000..c633d4f5 --- /dev/null +++ b/tests/canary/test_paths.py @@ -0,0 +1,66 @@ +"""Coverage for the persona-aware path resolver + placement validator.""" +from __future__ import annotations + +import pytest + +from decnet.canary.paths import ( + DEFAULT_LINUX_USER, + DEFAULT_WINDOWS_USER, + default_path_for, + default_user, + normalize_placement, +) + + +def test_default_user_dispatch() -> None: + assert default_user("linux") == DEFAULT_LINUX_USER + assert default_user("windows") == DEFAULT_WINDOWS_USER + # Unknown personas fall through to Linux — better to plant than fail. + assert default_user("aix") == DEFAULT_LINUX_USER + + +@pytest.mark.parametrize( + "generator, persona, expected_substr", + [ + ("aws_creds", "linux", "/home/admin/.aws/credentials"), + ("aws_creds", "windows", "/home/Administrator/.aws/credentials"), + ("env_file", "linux", "/home/admin/.env"), + ("env_file", "windows", "/home/Administrator/Desktop/prod.env"), + ("git_config", "linux", "/home/admin/.git/config"), + ("ssh_key", "linux", "/home/admin/.ssh/id_rsa"), + ("honeydoc", "linux", "/home/admin/Documents/quarterly_report.docx"), + ], +) +def test_default_path_for_known_generators( + generator: str, persona: str, expected_substr: str, +) -> None: + assert default_path_for(generator, persona) == expected_substr + + +def test_default_path_for_unknown_generator_falls_through() -> None: + # Unknown generator — defensive /tmp drop. The API rejects unknowns + # upstream, but the resolver shouldn't crash if one slips through. + assert default_path_for("bogus") == "/tmp/bogus.canary" + + +def test_normalize_placement_accepts_clean_paths() -> None: + assert normalize_placement("/home/admin/.env") == "/home/admin/.env" + assert normalize_placement("/var/lib/x") == "/var/lib/x" + + +@pytest.mark.parametrize( + "bad", + [ + "", + "relative/path", + "./still-relative", + "/path/with\x00nul", + "/path/with\nnewline", + "/path/with\rcr", + "/path/../escape", + "/trailing/..", + ], +) +def test_normalize_placement_rejects_bad(bad: str) -> None: + with pytest.raises(ValueError): + normalize_placement(bad) diff --git a/tests/canary/test_repository.py b/tests/canary/test_repository.py new file mode 100644 index 00000000..3cabf1ee --- /dev/null +++ b/tests/canary/test_repository.py @@ -0,0 +1,179 @@ +"""Repository CRUD coverage for canary blobs / tokens / triggers. + +Same harness as the rest of :mod:`tests.db` — spin up a SQLite-backed +:class:`SQLiteRepository` against a tempfile, exercise the public +methods, assert observable state. + +We deliberately don't go through the API; that gets its own test +module once the router lands. This file proves the repository layer +in isolation: dedup, refcount-aware delete, slug lookup, atomic +trigger record + counter bump, attribution. +""" +from __future__ import annotations + +import hashlib +from typing import AsyncIterator + +import pytest +import pytest_asyncio + +from decnet.web.db.sqlite.repository import SQLiteRepository +import decnet.web.db.models # noqa: F401 — registers tables on import + + +@pytest_asyncio.fixture +async def repo(tmp_path) -> AsyncIterator[SQLiteRepository]: + r = SQLiteRepository(str(tmp_path / "canary.db")) + await r.initialize() + yield r + + +async def _make_blob(repo: SQLiteRepository, content: bytes, *, by: str = "u1") -> dict: + return await repo.upsert_canary_blob({ + "sha256": hashlib.sha256(content).hexdigest(), + "filename": "report.docx", + "content_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "size_bytes": len(content), + "uploaded_by": by, + }) + + +@pytest.mark.asyncio +async def test_upsert_blob_dedupes_by_sha256(repo: SQLiteRepository) -> None: + a = await _make_blob(repo, b"same bytes", by="u1") + b = await _make_blob(repo, b"same bytes", by="u2") + assert a["uuid"] == b["uuid"], "second upload must return the canonical row" + # Different bytes → different blob. + c = await _make_blob(repo, b"different bytes", by="u1") + assert c["uuid"] != a["uuid"] + + +@pytest.mark.asyncio +async def test_upsert_blob_requires_sha256(repo: SQLiteRepository) -> None: + with pytest.raises(ValueError): + await repo.upsert_canary_blob({"filename": "x", "content_type": "x", "size_bytes": 0, "uploaded_by": "u"}) + + +@pytest.mark.asyncio +async def test_get_blob_by_sha256(repo: SQLiteRepository) -> None: + blob = await _make_blob(repo, b"x") + found = await repo.get_canary_blob_by_sha256(blob["sha256"]) + assert found is not None and found["uuid"] == blob["uuid"] + assert await repo.get_canary_blob_by_sha256("0" * 64) is None + + +@pytest.mark.asyncio +async def test_list_blobs_carries_token_count(repo: SQLiteRepository) -> None: + blob = await _make_blob(repo, b"x") + listed = await repo.list_canary_blobs() + assert len(listed) == 1 and listed[0]["token_count"] == 0 + await repo.create_canary_token({ + "kind": "http", "decky_name": "web1", "blob_uuid": blob["uuid"], + "instrumenter": "docx", "placement_path": "/tmp/x.docx", + "callback_token": "slug-1", "secret_seed": "s", "created_by": "u1", + }) + listed = await repo.list_canary_blobs() + assert listed[0]["token_count"] == 1 + + +@pytest.mark.asyncio +async def test_delete_blob_refuses_while_referenced(repo: SQLiteRepository) -> None: + blob = await _make_blob(repo, b"x") + await repo.create_canary_token({ + "kind": "http", "decky_name": "web1", "blob_uuid": blob["uuid"], + "instrumenter": "docx", "placement_path": "/tmp/x.docx", + "callback_token": "slug-r", "secret_seed": "s", "created_by": "u1", + }) + assert await repo.delete_canary_blob(blob["uuid"]) is False + # Even after revoke, the row still references the blob — operator + # must explicitly clean tokens before they can prune the blob. + tok = await repo.get_canary_token_by_slug("slug-r") + await repo.update_canary_token_state(tok["uuid"], "revoked") + assert await repo.delete_canary_blob(blob["uuid"]) is False + + +@pytest.mark.asyncio +async def test_delete_blob_returns_false_for_missing(repo: SQLiteRepository) -> None: + assert await repo.delete_canary_blob("00000000-0000-0000-0000-000000000000") is False + + +@pytest.mark.asyncio +async def test_token_slug_lookup(repo: SQLiteRepository) -> None: + await repo.create_canary_token({ + "kind": "http", "decky_name": "web1", "generator": "aws_creds", + "placement_path": "/home/admin/.aws/credentials", + "callback_token": "slug-aws", "secret_seed": "s", "created_by": "u1", + }) + found = await repo.get_canary_token_by_slug("slug-aws") + assert found is not None and found["decky_name"] == "web1" + assert await repo.get_canary_token_by_slug("nonexistent") is None + + +@pytest.mark.asyncio +async def test_list_tokens_filters(repo: SQLiteRepository) -> None: + await repo.create_canary_token({ + "kind": "http", "decky_name": "web1", "generator": "aws_creds", + "placement_path": "/a", "callback_token": "s1", + "secret_seed": "s", "created_by": "u1", + }) + await repo.create_canary_token({ + "kind": "dns", "decky_name": "web2", "generator": "aws_creds", + "placement_path": "/b", "callback_token": "s2", + "secret_seed": "s", "created_by": "u1", + }) + assert len(await repo.list_canary_tokens()) == 2 + assert len(await repo.list_canary_tokens(decky_name="web1")) == 1 + assert len(await repo.list_canary_tokens(kind="dns")) == 1 + assert len(await repo.list_canary_tokens(state="revoked")) == 0 + + +@pytest.mark.asyncio +async def test_record_trigger_bumps_counters_atomically(repo: SQLiteRepository) -> None: + await repo.create_canary_token({ + "kind": "http", "decky_name": "web1", "generator": "aws_creds", + "placement_path": "/a", "callback_token": "slug-c", + "secret_seed": "s", "created_by": "u1", + }) + tok = await repo.get_canary_token_by_slug("slug-c") + assert tok["trigger_count"] == 0 and tok["last_triggered_at"] is None + trig_id = await repo.record_canary_trigger({ + "token_uuid": tok["uuid"], "src_ip": "1.2.3.4", + "request_path": "/c/slug-c", "user_agent": "curl/8.0", + "raw_headers": {"user-agent": "curl/8.0"}, + }) + assert trig_id + tok2 = await repo.get_canary_token_by_slug("slug-c") + assert tok2["trigger_count"] == 1 + assert tok2["last_triggered_at"] is not None + # raw_headers stored as JSON text and decodes via the model helper. + triggers = await repo.list_canary_triggers(tok["uuid"]) + assert len(triggers) == 1 + assert triggers[0]["src_ip"] == "1.2.3.4" + + +@pytest.mark.asyncio +async def test_attribute_trigger_sets_attacker(repo: SQLiteRepository) -> None: + await repo.create_canary_token({ + "kind": "http", "decky_name": "web1", "generator": "aws_creds", + "placement_path": "/a", "callback_token": "slug-at", + "secret_seed": "s", "created_by": "u1", + }) + tok = await repo.get_canary_token_by_slug("slug-at") + trig_id = await repo.record_canary_trigger({ + "token_uuid": tok["uuid"], "src_ip": "9.9.9.9", + }) + assert await repo.attribute_canary_trigger(trig_id, "attacker-uuid-123") is True + assert await repo.attribute_canary_trigger("missing-trig", "x") is False + triggers = await repo.list_canary_triggers(tok["uuid"]) + assert triggers[0]["attacker_id"] == "attacker-uuid-123" + + +@pytest.mark.asyncio +async def test_get_token_returns_none_for_missing(repo: SQLiteRepository) -> None: + assert await repo.get_canary_token("00000000-0000-0000-0000-000000000000") is None + assert await repo.get_canary_blob("00000000-0000-0000-0000-000000000000") is None + + +@pytest.mark.asyncio +async def test_update_state_returns_false_for_missing(repo: SQLiteRepository) -> None: + assert await repo.update_canary_token_state("missing", "revoked") is False diff --git a/tests/canary/test_storage.py b/tests/canary/test_storage.py new file mode 100644 index 00000000..5fd748ab --- /dev/null +++ b/tests/canary/test_storage.py @@ -0,0 +1,52 @@ +"""Coverage for the on-disk blob store.""" +from __future__ import annotations + +import hashlib + +from decnet.canary import storage + + +def test_write_blob_is_idempotent(tmp_path, monkeypatch) -> None: + monkeypatch.setenv("DECNET_CANARY_BLOB_DIR", str(tmp_path)) + sha1, p1, sz1 = storage.write_blob(b"hello canary") + sha2, p2, sz2 = storage.write_blob(b"hello canary") + assert sha1 == sha2 == hashlib.sha256(b"hello canary").hexdigest() + assert p1 == p2 + assert sz1 == sz2 == len(b"hello canary") + # Two-level fan-out: ab/cd/abcd... + assert p1.parent.parent.parent == tmp_path + assert p1.parent.name == sha1[2:4] + assert p1.parent.parent.name == sha1[:2] + + +def test_read_blob_returns_bytes(tmp_path, monkeypatch) -> None: + monkeypatch.setenv("DECNET_CANARY_BLOB_DIR", str(tmp_path)) + sha, _, _ = storage.write_blob(b"some payload") + assert storage.read_blob(sha) == b"some payload" + + +def test_unlink_blob_returns_false_for_missing(tmp_path, monkeypatch) -> None: + monkeypatch.setenv("DECNET_CANARY_BLOB_DIR", str(tmp_path)) + sha = "0" * 64 + assert storage.unlink_blob(sha) is False + + +def test_unlink_blob_removes_file(tmp_path, monkeypatch) -> None: + monkeypatch.setenv("DECNET_CANARY_BLOB_DIR", str(tmp_path)) + sha, path, _ = storage.write_blob(b"to be removed") + assert path.exists() + assert storage.unlink_blob(sha) is True + assert not path.exists() + # Second unlink is a no-op rather than a crash. + assert storage.unlink_blob(sha) is False + + +def test_blob_dir_honors_env(monkeypatch, tmp_path) -> None: + monkeypatch.setenv("DECNET_CANARY_BLOB_DIR", str(tmp_path / "alt")) + assert storage.blob_dir() == tmp_path / "alt" + + +def test_short_sha_rejected() -> None: + import pytest + with pytest.raises(ValueError): + storage._path_for("abc") diff --git a/tests/canary/test_topics.py b/tests/canary/test_topics.py new file mode 100644 index 00000000..657a6a07 --- /dev/null +++ b/tests/canary/test_topics.py @@ -0,0 +1,42 @@ +"""Coverage for the canary bus-topic builder + constants. + +The builder shares :func:`_reject_tokens` with every other family in +:mod:`decnet.bus.topics`, so we only need to exercise the canary +surface: the three leaf constants and that bogus segments are +rejected. Anything more would duplicate :mod:`tests.bus.test_topics`. +""" +from __future__ import annotations + +import pytest + +from decnet.bus import topics + + +def test_canary_constants_are_distinct() -> None: + assert topics.CANARY == "canary" + assert topics.CANARY_PLACED == "placed" + assert topics.CANARY_TRIGGERED == "triggered" + assert topics.CANARY_REVOKED == "revoked" + assert len({ + topics.CANARY_PLACED, + topics.CANARY_TRIGGERED, + topics.CANARY_REVOKED, + }) == 3 + + +def test_canary_builder_round_trip() -> None: + assert topics.canary("abc-123", topics.CANARY_TRIGGERED) == "canary.abc-123.triggered" + assert topics.canary("xyz", topics.CANARY_PLACED) == "canary.xyz.placed" + assert topics.canary("xyz", topics.CANARY_REVOKED) == "canary.xyz.revoked" + + +@pytest.mark.parametrize("bogus_id", ["", "with.dot", "with*wildcard", "with>chevron", "with space"]) +def test_canary_builder_rejects_bad_token_id(bogus_id: str) -> None: + with pytest.raises(ValueError): + topics.canary(bogus_id, topics.CANARY_TRIGGERED) + + +@pytest.mark.parametrize("bogus_event", ["", "x.y", "*", ">"]) +def test_canary_builder_rejects_bad_event(bogus_event: str) -> None: + with pytest.raises(ValueError): + topics.canary("good_id", bogus_event)