feat(canary): package scaffolding (base/factory/paths/storage) + tests

Mirrors the decnet.intel layout (base + factory + lazy concrete
imports). Defines:

- CanaryArtifact / CanaryContext dataclasses + the generator and
  instrumenter ABCs they share
- factory dispatch for generators (git_config/env_file/ssh_key/
  aws_creds/honeydoc) and instrumenters (docx/xlsx/pdf/html/image/
  plain/passthrough), plus pick_instrumenter_for_mime() for MIME-driven
  dispatch on operator uploads
- persona-aware default placement paths (Linux vs. Windows-shaped)
  and absolute-path validation that the API will use to validate
  operator-supplied placement_path values
- on-disk blob store: sha256-keyed two-level fan-out, idempotent
  writes, refcount-aware unlink (the DB row is the source of truth)

Also covers prior commits' tests (bus topics, models, repo CRUD)
under tests/canary/. 79 tests, all pass.
This commit is contained in:
2026-04-27 12:56:01 -04:00
parent 6a0d140e91
commit 8f19adecfe
12 changed files with 989 additions and 0 deletions

37
decnet/canary/__init__.py Normal file
View File

@@ -0,0 +1,37 @@
"""Canary tokens — decoy artifacts planted in decky filesystems.
Public surface is exported here so callers can ``from decnet.canary
import CanaryArtifact, get_generator, get_instrumenter`` without
knowing the submodule layout. Concrete generators / instrumenters
live under :mod:`decnet.canary.generators` and
:mod:`decnet.canary.instrumenters` respectively; the factory keeps
import-time cost down by deferring those imports until first use
(same pattern as :mod:`decnet.intel.factory`).
"""
from __future__ import annotations
from decnet.canary.base import (
CanaryArtifact,
CanaryContext,
CanaryGenerator,
CanaryInstrumenter,
)
from decnet.canary.factory import (
KNOWN_GENERATORS,
KNOWN_INSTRUMENTERS,
get_generator,
get_instrumenter,
pick_instrumenter_for_mime,
)
__all__ = [
"CanaryArtifact",
"CanaryContext",
"CanaryGenerator",
"CanaryInstrumenter",
"KNOWN_GENERATORS",
"KNOWN_INSTRUMENTERS",
"get_generator",
"get_instrumenter",
"pick_instrumenter_for_mime",
]

145
decnet/canary/base.py Normal file
View File

@@ -0,0 +1,145 @@
"""Canary generator / instrumenter ABCs and the artifact dataclass.
Two flavors of producer share the same return shape:
* :class:`CanaryGenerator` synthesises a fake artifact from scratch
(e.g. a plausible ``~/.aws/credentials`` block, a ``.git/config``
pointing at an attacker-bait remote URL). Operators don't supply
any input.
* :class:`CanaryInstrumenter` mutates an operator-uploaded blob to
embed the callback (HTTP slug + DNS host). The original blob bytes
are passed in; the instrumenter returns the mutated version.
Both return a :class:`CanaryArtifact` — the planter doesn't care
which path produced it. Same dataclass keeps the planter's
docker-exec injector trivial.
ABCs intentionally do not include I/O — generators and instrumenters
are pure functions of (slug, host, blob?). All filesystem work
happens in :mod:`decnet.canary.planter` and :mod:`decnet.canary.storage`.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class CanaryContext:
"""Inputs every generator/instrumenter needs to embed a working callback.
``callback_token`` is the unique slug; it appears verbatim in HTTP
URLs (``https://<host>/c/<callback_token>``) and as the leftmost
DNS label (``<callback_token>.canary.<dns_zone>``) so a single
slug resolves to a single :class:`CanaryToken` row regardless of
which path the attacker tripped.
``http_base`` and ``dns_zone`` come from the canary worker's
public-facing config (``DECNET_CANARY_HTTP_BASE``,
``DECNET_CANARY_DNS_ZONE``). When DNS isn't deployed,
``dns_zone`` is empty and instrumenters that only have a DNS
surface (e.g. an artifact whose only realistic embed point is a
hostname) raise.
"""
callback_token: str
http_base: str # e.g. "https://canary.example.test" — no trailing slash
dns_zone: str = "" # e.g. "canary.example.test"; "" disables DNS embeds
persona: str = "linux" # "linux" | "windows" — drives default username, path style
@dataclass
class CanaryArtifact:
"""Bytes-and-placement bundle produced by a generator/instrumenter."""
path: str
"""Absolute path inside the target container."""
content: bytes
"""Final bytes that hit the decky filesystem.
Always raw bytes — the planter base64-encodes for the wire so
binary blobs (DOCX/PNG/PDF) survive ``docker exec sh -c`` safely.
"""
mode: int = 0o600
"""Unix file mode. Defaults to ``0600`` because most realistic
canary placements (``~/.aws/credentials``, ``.env``, ``id_rsa``)
are operator-only. Honeydocs in user docs folders should pass
``0o644``.
"""
mtime_offset: int = 0
"""Seconds relative to *now* for the planted file's mtime.
Negative values backdate the file so it doesn't look like it
appeared the moment the decky was deployed. ``-86400 * 90`` (90
days ago) is a common choice for ``honeydoc`` artifacts; ``0``
means "stamp it now," which is fine for ``aws_creds``-like files
that would plausibly be touched recently.
"""
instrumenter: Optional[str] = None
"""Identifier of the instrumenter that produced this artifact (for
upload-driven tokens). Mirrored into ``CanaryToken.instrumenter``.
Mutually exclusive with :attr:`generator`.
"""
generator: Optional[str] = None
"""Identifier of the generator that produced this artifact (for
synthesised tokens). Mirrored into ``CanaryToken.generator``.
Mutually exclusive with :attr:`instrumenter`.
"""
notes: list[str] = field(default_factory=list)
"""Human-readable notes about the embedding (e.g. "DOCX: injected
1×1 remote image at relsId rId99"). Surfaced in the API
``preview`` response so the operator sees what we did before
planting. Never leaked to the attacker-facing surface.
"""
class CanaryGenerator(ABC):
"""Produces a fake artifact from scratch."""
name: str #: short tag — matches ``CanaryToken.generator``
@abstractmethod
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
"""Synthesise the artifact.
MUST NOT do I/O. MUST be deterministic for the same
``(callback_token, http_base, dns_zone, persona)`` so re-seeding
from :attr:`CanaryToken.secret_seed` produces byte-identical
output and the planter is naturally idempotent.
"""
class CanaryInstrumenter(ABC):
"""Mutates an operator-uploaded blob to embed a callback."""
name: str #: short tag — matches ``CanaryToken.instrumenter``
#: MIME prefixes this instrumenter handles. The factory uses these
#: to dispatch by sniffed content-type. Sub-string match against
#: the prefix list (e.g. ``("application/pdf",)`` or
#: ``("text/",)``).
mime_prefixes: tuple[str, ...] = ()
@abstractmethod
def instrument(
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
) -> CanaryArtifact:
"""Return the mutated bytes with the callback embedded.
MUST raise :class:`InstrumenterRejectedError` when the blob
can't be safely mutated (corrupt zip, encrypted PDF, etc.) so
the API can surface a 400 with the specific reason rather than
silently shipping the original bytes.
"""
class InstrumenterRejectedError(ValueError):
"""Raised when an instrumenter can't safely mutate the input."""

129
decnet/canary/factory.py Normal file
View File

@@ -0,0 +1,129 @@
"""Generator and instrumenter factories.
Same lazy-import pattern as :mod:`decnet.intel.factory` — concrete
implementations stay un-imported until first use so importing
:mod:`decnet.canary` from a CLI subcommand doesn't drag in
``pikepdf`` / ``python-docx`` / ``Pillow`` for callers that only
need the model layer.
"""
from __future__ import annotations
from typing import Tuple
from decnet.canary.base import CanaryGenerator, CanaryInstrumenter
KNOWN_GENERATORS: Tuple[str, ...] = (
"git_config",
"env_file",
"ssh_key",
"aws_creds",
"honeydoc",
)
KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
"docx",
"xlsx",
"pdf",
"html",
"image",
"plain",
"passthrough",
)
def get_generator(name: str) -> CanaryGenerator:
"""Return the generator registered under ``name``.
Raises :class:`ValueError` for unknown names so a typo in the API
request surfaces as a 400 rather than silently producing nothing.
"""
if name == "git_config":
from decnet.canary.generators.git_config import GitConfigGenerator
return GitConfigGenerator()
if name == "env_file":
from decnet.canary.generators.env_file import EnvFileGenerator
return EnvFileGenerator()
if name == "ssh_key":
from decnet.canary.generators.ssh_key import SSHKeyGenerator
return SSHKeyGenerator()
if name == "aws_creds":
from decnet.canary.generators.aws_creds import AWSCredsGenerator
return AWSCredsGenerator()
if name == "honeydoc":
from decnet.canary.generators.honeydoc import HoneydocGenerator
return HoneydocGenerator()
raise ValueError(
f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
)
def get_instrumenter(name: str) -> CanaryInstrumenter:
"""Return the instrumenter registered under ``name``."""
if name == "docx":
from decnet.canary.instrumenters.docx import DocxInstrumenter
return DocxInstrumenter()
if name == "xlsx":
from decnet.canary.instrumenters.xlsx import XlsxInstrumenter
return XlsxInstrumenter()
if name == "pdf":
from decnet.canary.instrumenters.pdf import PdfInstrumenter
return PdfInstrumenter()
if name == "html":
from decnet.canary.instrumenters.html import HtmlInstrumenter
return HtmlInstrumenter()
if name == "image":
from decnet.canary.instrumenters.image import ImageInstrumenter
return ImageInstrumenter()
if name == "plain":
from decnet.canary.instrumenters.plain import PlainInstrumenter
return PlainInstrumenter()
if name == "passthrough":
from decnet.canary.instrumenters.passthrough import PassthroughInstrumenter
return PassthroughInstrumenter()
raise ValueError(
f"Unknown canary instrumenter: {name!r}. Known: {KNOWN_INSTRUMENTERS}"
)
# MIME → instrumenter dispatch. Order matters: we walk the table
# top-to-bottom and the first prefix match wins, so put the more
# specific (DOCX/XLSX) before the generic (zip/octet-stream).
_MIME_DISPATCH: tuple[tuple[str, str], ...] = (
# Office Open XML — DOCX/XLSX share a zip structure but expose
# different inner trees, so dispatch by MIME alias rather than
# zip-poking.
("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx"),
("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx"),
("application/pdf", "pdf"),
("text/html", "html"),
("application/xhtml+xml", "html"),
("image/png", "image"),
("image/jpeg", "image"),
("image/gif", "image"),
# Plaintext catch-alls — config files, .env, .ini, .yaml, .json,
# source code. All handled by the same regex-substitution pass.
("text/", "plain"),
("application/json", "plain"),
("application/x-yaml", "plain"),
("application/yaml", "plain"),
("application/toml", "plain"),
)
def pick_instrumenter_for_mime(content_type: str) -> str:
"""Return the instrumenter name registered for a sniffed MIME.
Falls back to ``"passthrough"`` for anything we don't have an
embedder for (binary blobs we can't mutate safely — random
container images, archives, executables). ``passthrough`` only
supports DNS-callback tokens (the slug ends up in the filename or
an accompanying README), so the API surfaces that constraint to
the operator before they pick a kind.
"""
if not content_type:
return "passthrough"
lowered = content_type.lower()
for prefix, name in _MIME_DISPATCH:
if lowered.startswith(prefix):
return name
return "passthrough"

78
decnet/canary/paths.py Normal file
View File

@@ -0,0 +1,78 @@
"""Persona-aware path resolution for canary artifacts.
Linux-persona deckies use POSIX-shaped paths under ``/home/<user>``.
"Windows" personas (still Linux containers under the hood — see
:mod:`decnet.archetypes`) use Windows-shaped paths under
``/home/<user>/AppData/...`` so an attacker browsing the filesystem
through a planted RDP/SMB session sees the right shape.
The persona lookup is best-effort: callers pass the
:attr:`decnet.archetypes.Archetype.nmap_os` value (``"linux"`` or
``"windows"``); unknown personas fall through to ``"linux"``.
Operators can always override by passing an explicit
``placement_path`` when creating a token.
"""
from __future__ import annotations
DEFAULT_LINUX_USER = "admin"
DEFAULT_WINDOWS_USER = "Administrator"
# Canonical placements for the synthesizer-driven baseline tokens.
# Operators can override per-token via the API, but these are the
# defaults the deploy-time seed uses.
_LINUX_DEFAULTS: dict[str, str] = {
"git_config": "/home/{user}/.git/config",
"env_file": "/home/{user}/.env",
"ssh_key": "/home/{user}/.ssh/id_rsa",
"aws_creds": "/home/{user}/.aws/credentials",
"honeydoc": "/home/{user}/Documents/quarterly_report.docx",
}
_WINDOWS_DEFAULTS: dict[str, str] = {
"git_config": "/home/{user}/AppData/Local/Programs/Git/etc/gitconfig",
"env_file": "/home/{user}/Desktop/prod.env",
"ssh_key": "/home/{user}/.ssh/id_rsa", # OpenSSH on Windows uses the same path
"aws_creds": "/home/{user}/.aws/credentials",
"honeydoc": "/home/{user}/Documents/quarterly_report.docx",
}
def default_user(persona: str) -> str:
"""Return the conventional unprivileged username for a persona."""
return DEFAULT_WINDOWS_USER if persona == "windows" else DEFAULT_LINUX_USER
def default_path_for(generator: str, persona: str = "linux") -> str:
"""Resolve the default placement path for a synthesized token.
Returns an absolute container path with ``{user}`` already
expanded. Falls back to a sane Linux default for unknown
personas — better to plant *something* than fail the deploy hook.
"""
table = _WINDOWS_DEFAULTS if persona == "windows" else _LINUX_DEFAULTS
template = table.get(generator)
if not template:
# Unknown generator — fall back to a generic /tmp drop so the
# planter still has somewhere to write. The API rejects
# unknown generators upstream, so this branch is defensive.
return f"/tmp/{generator}.canary" # nosec B108 — placement inside attacker-facing decoy container, not host /tmp
return template.format(user=default_user(persona))
def normalize_placement(path: str) -> str:
"""Validate and normalize an operator-supplied placement path.
Forbids relative paths, NUL bytes, and shell metacharacters that
``docker exec sh -c`` can't safely round-trip. Returns the
sanitised path unchanged when valid; raises :class:`ValueError`
otherwise so the API can return a 400 with a clear message.
"""
if not path or not path.startswith("/"):
raise ValueError("placement_path must be absolute (start with '/')")
if "\x00" in path:
raise ValueError("placement_path may not contain NUL")
if "\n" in path or "\r" in path:
raise ValueError("placement_path may not contain newlines")
if "../" in path or path.endswith("/.."):
raise ValueError("placement_path may not contain '..' segments")
return path

89
decnet/canary/storage.py Normal file
View File

@@ -0,0 +1,89 @@
"""Filesystem store for operator-uploaded canary blobs.
Blobs live under ``/var/lib/decnet/canary/blobs/<sha256>`` (override
via ``DECNET_CANARY_BLOB_DIR``) and are deduplicated by content hash.
The DB table :class:`decnet.web.db.models.CanaryBlob` mirrors
metadata; the bytes are read on demand at instrumentation time, so
the API process never holds large operator uploads in memory longer
than the request itself.
Refcount-aware deletion is enforced at the DB layer (see
:meth:`decnet.web.db.repository.BaseRepository.delete_canary_blob`);
this module only provides write/read/unlink primitives keyed by
sha256.
"""
from __future__ import annotations
import hashlib
import os
from pathlib import Path
from typing import Tuple
def blob_dir() -> Path:
"""Return the on-disk root for canary blobs.
Honors ``DECNET_CANARY_BLOB_DIR`` so tests can point at a tmp
path. The directory is created lazily on first write.
"""
raw = os.environ.get("DECNET_CANARY_BLOB_DIR", "/var/lib/decnet/canary/blobs")
return Path(raw)
def _path_for(sha256: str) -> Path:
# Two-level fan-out (``ab/cd/abcd...``) keeps any one directory
# from accumulating thousands of entries on busy fleets. Same
# shape as Git's loose-object store.
if len(sha256) < 4:
raise ValueError("sha256 must be at least 4 chars")
root = blob_dir()
return root / sha256[:2] / sha256[2:4] / sha256
def write_blob(content: bytes) -> Tuple[str, Path, int]:
"""Persist ``content`` under its sha256 path.
Idempotent: if the target file already exists with the same
bytes, no rewrite happens. Returns ``(sha256, path,
size_bytes)``.
"""
sha = hashlib.sha256(content).hexdigest()
target = _path_for(sha)
target.parent.mkdir(parents=True, exist_ok=True)
if not target.exists():
# Atomic-ish: write to a temp sibling and rename. Avoids the
# half-written-file race a concurrent reader would otherwise
# see if we wrote in place.
tmp = target.with_suffix(target.suffix + ".part")
tmp.write_bytes(content)
os.replace(tmp, target)
return sha, target, len(content)
def read_blob(sha256: str) -> bytes:
"""Read the bytes for a stored blob.
Raises :class:`FileNotFoundError` when the on-disk row was unlinked
out of band (operator pruned ``/var/lib/decnet`` by hand) — the
caller (instrumenter dispatch) surfaces it as a 410-ish error so
the operator can re-upload.
"""
return _path_for(sha256).read_bytes()
def unlink_blob(sha256: str) -> bool:
"""Delete the on-disk bytes for ``sha256``.
Returns True if a file was removed, False if it was already gone.
The DB row deletion happens in
:meth:`SQLModelRepository.delete_canary_blob`; this function is
a best-effort companion called *after* the DB delete commits so
a crash between them leaves a recoverable orphan, never a
dangling DB reference.
"""
target = _path_for(sha256)
try:
target.unlink()
except FileNotFoundError:
return False
return True

0
tests/canary/__init__.py Normal file
View File

View File

@@ -0,0 +1,87 @@
"""Coverage for the generator/instrumenter factory + MIME dispatch.
The concrete generators and instrumenters land in subsequent commits;
this file only tests the dispatch surface — it must reject unknown
names with ``ValueError`` and pick the right instrumenter for known
MIME types (with passthrough as the fallback for binary blobs we
can't safely mutate).
"""
from __future__ import annotations
import pytest
from decnet.canary.factory import (
KNOWN_GENERATORS,
KNOWN_INSTRUMENTERS,
pick_instrumenter_for_mime,
)
@pytest.mark.parametrize(
"mime, expected",
[
("application/pdf", "pdf"),
("application/PDF", "pdf"), # case-insensitive
("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx"),
("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx"),
("text/html", "html"),
("application/xhtml+xml", "html"),
("text/plain", "plain"),
("text/x-yaml", "plain"),
("application/json", "plain"),
("application/yaml", "plain"),
("application/toml", "plain"),
("image/png", "image"),
("image/jpeg", "image"),
("image/gif", "image"),
],
)
def test_mime_dispatch_known(mime: str, expected: str) -> None:
assert pick_instrumenter_for_mime(mime) == expected
@pytest.mark.parametrize(
"mime",
[
"",
"application/octet-stream",
"application/x-tar",
"application/zip", # bare zip — DOCX/XLSX dispatch by alias, not raw zip
"video/mp4",
"audio/mpeg",
],
)
def test_mime_dispatch_falls_back_to_passthrough(mime: str) -> None:
assert pick_instrumenter_for_mime(mime) == "passthrough"
def test_known_lists_are_stable() -> None:
# If anyone adds/removes from the dispatch tables, the test
# surfaces it. Keeps the schema-of-record in one place.
assert KNOWN_GENERATORS == (
"git_config", "env_file", "ssh_key", "aws_creds", "honeydoc",
)
assert KNOWN_INSTRUMENTERS == (
"docx", "xlsx", "pdf", "html", "image", "plain", "passthrough",
)
def test_unknown_generator_raises() -> None:
from decnet.canary.factory import get_generator
with pytest.raises(ValueError, match="Unknown canary generator"):
get_generator("bogus")
def test_unknown_instrumenter_raises() -> None:
from decnet.canary.factory import get_instrumenter
with pytest.raises(ValueError, match="Unknown canary instrumenter"):
get_instrumenter("bogus")
def test_base_artifact_dataclass_defaults() -> None:
from decnet.canary import CanaryArtifact
a = CanaryArtifact(path="/x", content=b"y")
assert a.mode == 0o600
assert a.mtime_offset == 0
assert a.notes == []
assert a.generator is None and a.instrumenter is None

View File

@@ -0,0 +1,85 @@
"""Smoke coverage for the Pydantic request/response shapes + helpers.
The tables themselves are exercised end-to-end in
:mod:`tests.canary.test_repository`; this module only covers the
helpers and request validation that don't go through the DB —
``CanaryTrigger.headers()`` JSON decoding, the
``CanaryTokenCreateRequest`` body shape, and the dump-roundtrip on
the response models.
"""
from __future__ import annotations
import pytest
from decnet.web.db.models import (
CanaryBlobResponse,
CanaryTokenCreateRequest,
CanaryTokenResponse,
CanaryTrigger,
CanaryTriggerResponse,
)
def test_create_request_minimal() -> None:
r = CanaryTokenCreateRequest(
decky_name="web1",
kind="http",
placement_path="/home/admin/.env",
generator="env_file",
)
assert r.blob_uuid is None
assert r.persona_path_hint is None
def test_create_request_kind_is_constrained() -> None:
with pytest.raises(ValueError):
CanaryTokenCreateRequest(
decky_name="web1", kind="bogus", # type: ignore[arg-type]
placement_path="/x", generator="aws_creds",
)
def test_trigger_headers_decode_valid_json() -> None:
t = CanaryTrigger(
token_uuid="t",
src_ip="1.2.3.4",
raw_headers='{"user-agent":"curl"}',
)
assert t.headers() == {"user-agent": "curl"}
@pytest.mark.parametrize("raw", ["", "not json", "[1,2,3]", "null"])
def test_trigger_headers_falls_back_to_empty(raw: str) -> None:
t = CanaryTrigger(token_uuid="t", src_ip="1.2.3.4", raw_headers=raw)
assert t.headers() == {}
def test_response_models_round_trip() -> None:
# Canonical shapes — proves the field set + types match what the
# router will hand back. Strings everywhere because the DB layer
# uses str UUIDs (project convention).
blob = CanaryBlobResponse(
uuid="b1", sha256="0" * 64, filename="x.docx",
content_type="application/octet-stream", size_bytes=1,
uploaded_by="u1", uploaded_at="2026-04-27T00:00:00Z", # type: ignore[arg-type]
token_count=2,
)
assert blob.token_count == 2
tok = CanaryTokenResponse(
uuid="t1", kind="http", decky_name="web1",
blob_uuid=None, instrumenter=None, generator="aws_creds",
placement_path="/a", callback_token="s",
placed_at="2026-04-27T00:00:00Z", # type: ignore[arg-type]
last_triggered_at=None, trigger_count=0,
created_by="u1", state="planted", last_error=None,
)
assert tok.kind == "http"
trig = CanaryTriggerResponse(
uuid="x", token_uuid="t1",
occurred_at="2026-04-27T00:00:00Z", # type: ignore[arg-type]
src_ip="1.2.3.4", user_agent=None, request_path=None,
dns_qname=None, headers={}, attacker_id=None,
)
assert trig.src_ip == "1.2.3.4"

View File

@@ -0,0 +1,66 @@
"""Coverage for the persona-aware path resolver + placement validator."""
from __future__ import annotations
import pytest
from decnet.canary.paths import (
DEFAULT_LINUX_USER,
DEFAULT_WINDOWS_USER,
default_path_for,
default_user,
normalize_placement,
)
def test_default_user_dispatch() -> None:
assert default_user("linux") == DEFAULT_LINUX_USER
assert default_user("windows") == DEFAULT_WINDOWS_USER
# Unknown personas fall through to Linux — better to plant than fail.
assert default_user("aix") == DEFAULT_LINUX_USER
@pytest.mark.parametrize(
"generator, persona, expected_substr",
[
("aws_creds", "linux", "/home/admin/.aws/credentials"),
("aws_creds", "windows", "/home/Administrator/.aws/credentials"),
("env_file", "linux", "/home/admin/.env"),
("env_file", "windows", "/home/Administrator/Desktop/prod.env"),
("git_config", "linux", "/home/admin/.git/config"),
("ssh_key", "linux", "/home/admin/.ssh/id_rsa"),
("honeydoc", "linux", "/home/admin/Documents/quarterly_report.docx"),
],
)
def test_default_path_for_known_generators(
generator: str, persona: str, expected_substr: str,
) -> None:
assert default_path_for(generator, persona) == expected_substr
def test_default_path_for_unknown_generator_falls_through() -> None:
# Unknown generator — defensive /tmp drop. The API rejects unknowns
# upstream, but the resolver shouldn't crash if one slips through.
assert default_path_for("bogus") == "/tmp/bogus.canary"
def test_normalize_placement_accepts_clean_paths() -> None:
assert normalize_placement("/home/admin/.env") == "/home/admin/.env"
assert normalize_placement("/var/lib/x") == "/var/lib/x"
@pytest.mark.parametrize(
"bad",
[
"",
"relative/path",
"./still-relative",
"/path/with\x00nul",
"/path/with\nnewline",
"/path/with\rcr",
"/path/../escape",
"/trailing/..",
],
)
def test_normalize_placement_rejects_bad(bad: str) -> None:
with pytest.raises(ValueError):
normalize_placement(bad)

View File

@@ -0,0 +1,179 @@
"""Repository CRUD coverage for canary blobs / tokens / triggers.
Same harness as the rest of :mod:`tests.db` — spin up a SQLite-backed
:class:`SQLiteRepository` against a tempfile, exercise the public
methods, assert observable state.
We deliberately don't go through the API; that gets its own test
module once the router lands. This file proves the repository layer
in isolation: dedup, refcount-aware delete, slug lookup, atomic
trigger record + counter bump, attribution.
"""
from __future__ import annotations
import hashlib
from typing import AsyncIterator
import pytest
import pytest_asyncio
from decnet.web.db.sqlite.repository import SQLiteRepository
import decnet.web.db.models # noqa: F401 — registers tables on import
@pytest_asyncio.fixture
async def repo(tmp_path) -> AsyncIterator[SQLiteRepository]:
r = SQLiteRepository(str(tmp_path / "canary.db"))
await r.initialize()
yield r
async def _make_blob(repo: SQLiteRepository, content: bytes, *, by: str = "u1") -> dict:
return await repo.upsert_canary_blob({
"sha256": hashlib.sha256(content).hexdigest(),
"filename": "report.docx",
"content_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"size_bytes": len(content),
"uploaded_by": by,
})
@pytest.mark.asyncio
async def test_upsert_blob_dedupes_by_sha256(repo: SQLiteRepository) -> None:
a = await _make_blob(repo, b"same bytes", by="u1")
b = await _make_blob(repo, b"same bytes", by="u2")
assert a["uuid"] == b["uuid"], "second upload must return the canonical row"
# Different bytes → different blob.
c = await _make_blob(repo, b"different bytes", by="u1")
assert c["uuid"] != a["uuid"]
@pytest.mark.asyncio
async def test_upsert_blob_requires_sha256(repo: SQLiteRepository) -> None:
with pytest.raises(ValueError):
await repo.upsert_canary_blob({"filename": "x", "content_type": "x", "size_bytes": 0, "uploaded_by": "u"})
@pytest.mark.asyncio
async def test_get_blob_by_sha256(repo: SQLiteRepository) -> None:
blob = await _make_blob(repo, b"x")
found = await repo.get_canary_blob_by_sha256(blob["sha256"])
assert found is not None and found["uuid"] == blob["uuid"]
assert await repo.get_canary_blob_by_sha256("0" * 64) is None
@pytest.mark.asyncio
async def test_list_blobs_carries_token_count(repo: SQLiteRepository) -> None:
blob = await _make_blob(repo, b"x")
listed = await repo.list_canary_blobs()
assert len(listed) == 1 and listed[0]["token_count"] == 0
await repo.create_canary_token({
"kind": "http", "decky_name": "web1", "blob_uuid": blob["uuid"],
"instrumenter": "docx", "placement_path": "/tmp/x.docx",
"callback_token": "slug-1", "secret_seed": "s", "created_by": "u1",
})
listed = await repo.list_canary_blobs()
assert listed[0]["token_count"] == 1
@pytest.mark.asyncio
async def test_delete_blob_refuses_while_referenced(repo: SQLiteRepository) -> None:
blob = await _make_blob(repo, b"x")
await repo.create_canary_token({
"kind": "http", "decky_name": "web1", "blob_uuid": blob["uuid"],
"instrumenter": "docx", "placement_path": "/tmp/x.docx",
"callback_token": "slug-r", "secret_seed": "s", "created_by": "u1",
})
assert await repo.delete_canary_blob(blob["uuid"]) is False
# Even after revoke, the row still references the blob — operator
# must explicitly clean tokens before they can prune the blob.
tok = await repo.get_canary_token_by_slug("slug-r")
await repo.update_canary_token_state(tok["uuid"], "revoked")
assert await repo.delete_canary_blob(blob["uuid"]) is False
@pytest.mark.asyncio
async def test_delete_blob_returns_false_for_missing(repo: SQLiteRepository) -> None:
assert await repo.delete_canary_blob("00000000-0000-0000-0000-000000000000") is False
@pytest.mark.asyncio
async def test_token_slug_lookup(repo: SQLiteRepository) -> None:
await repo.create_canary_token({
"kind": "http", "decky_name": "web1", "generator": "aws_creds",
"placement_path": "/home/admin/.aws/credentials",
"callback_token": "slug-aws", "secret_seed": "s", "created_by": "u1",
})
found = await repo.get_canary_token_by_slug("slug-aws")
assert found is not None and found["decky_name"] == "web1"
assert await repo.get_canary_token_by_slug("nonexistent") is None
@pytest.mark.asyncio
async def test_list_tokens_filters(repo: SQLiteRepository) -> None:
await repo.create_canary_token({
"kind": "http", "decky_name": "web1", "generator": "aws_creds",
"placement_path": "/a", "callback_token": "s1",
"secret_seed": "s", "created_by": "u1",
})
await repo.create_canary_token({
"kind": "dns", "decky_name": "web2", "generator": "aws_creds",
"placement_path": "/b", "callback_token": "s2",
"secret_seed": "s", "created_by": "u1",
})
assert len(await repo.list_canary_tokens()) == 2
assert len(await repo.list_canary_tokens(decky_name="web1")) == 1
assert len(await repo.list_canary_tokens(kind="dns")) == 1
assert len(await repo.list_canary_tokens(state="revoked")) == 0
@pytest.mark.asyncio
async def test_record_trigger_bumps_counters_atomically(repo: SQLiteRepository) -> None:
await repo.create_canary_token({
"kind": "http", "decky_name": "web1", "generator": "aws_creds",
"placement_path": "/a", "callback_token": "slug-c",
"secret_seed": "s", "created_by": "u1",
})
tok = await repo.get_canary_token_by_slug("slug-c")
assert tok["trigger_count"] == 0 and tok["last_triggered_at"] is None
trig_id = await repo.record_canary_trigger({
"token_uuid": tok["uuid"], "src_ip": "1.2.3.4",
"request_path": "/c/slug-c", "user_agent": "curl/8.0",
"raw_headers": {"user-agent": "curl/8.0"},
})
assert trig_id
tok2 = await repo.get_canary_token_by_slug("slug-c")
assert tok2["trigger_count"] == 1
assert tok2["last_triggered_at"] is not None
# raw_headers stored as JSON text and decodes via the model helper.
triggers = await repo.list_canary_triggers(tok["uuid"])
assert len(triggers) == 1
assert triggers[0]["src_ip"] == "1.2.3.4"
@pytest.mark.asyncio
async def test_attribute_trigger_sets_attacker(repo: SQLiteRepository) -> None:
await repo.create_canary_token({
"kind": "http", "decky_name": "web1", "generator": "aws_creds",
"placement_path": "/a", "callback_token": "slug-at",
"secret_seed": "s", "created_by": "u1",
})
tok = await repo.get_canary_token_by_slug("slug-at")
trig_id = await repo.record_canary_trigger({
"token_uuid": tok["uuid"], "src_ip": "9.9.9.9",
})
assert await repo.attribute_canary_trigger(trig_id, "attacker-uuid-123") is True
assert await repo.attribute_canary_trigger("missing-trig", "x") is False
triggers = await repo.list_canary_triggers(tok["uuid"])
assert triggers[0]["attacker_id"] == "attacker-uuid-123"
@pytest.mark.asyncio
async def test_get_token_returns_none_for_missing(repo: SQLiteRepository) -> None:
assert await repo.get_canary_token("00000000-0000-0000-0000-000000000000") is None
assert await repo.get_canary_blob("00000000-0000-0000-0000-000000000000") is None
@pytest.mark.asyncio
async def test_update_state_returns_false_for_missing(repo: SQLiteRepository) -> None:
assert await repo.update_canary_token_state("missing", "revoked") is False

View File

@@ -0,0 +1,52 @@
"""Coverage for the on-disk blob store."""
from __future__ import annotations
import hashlib
from decnet.canary import storage
def test_write_blob_is_idempotent(tmp_path, monkeypatch) -> None:
monkeypatch.setenv("DECNET_CANARY_BLOB_DIR", str(tmp_path))
sha1, p1, sz1 = storage.write_blob(b"hello canary")
sha2, p2, sz2 = storage.write_blob(b"hello canary")
assert sha1 == sha2 == hashlib.sha256(b"hello canary").hexdigest()
assert p1 == p2
assert sz1 == sz2 == len(b"hello canary")
# Two-level fan-out: ab/cd/abcd...
assert p1.parent.parent.parent == tmp_path
assert p1.parent.name == sha1[2:4]
assert p1.parent.parent.name == sha1[:2]
def test_read_blob_returns_bytes(tmp_path, monkeypatch) -> None:
monkeypatch.setenv("DECNET_CANARY_BLOB_DIR", str(tmp_path))
sha, _, _ = storage.write_blob(b"some payload")
assert storage.read_blob(sha) == b"some payload"
def test_unlink_blob_returns_false_for_missing(tmp_path, monkeypatch) -> None:
monkeypatch.setenv("DECNET_CANARY_BLOB_DIR", str(tmp_path))
sha = "0" * 64
assert storage.unlink_blob(sha) is False
def test_unlink_blob_removes_file(tmp_path, monkeypatch) -> None:
monkeypatch.setenv("DECNET_CANARY_BLOB_DIR", str(tmp_path))
sha, path, _ = storage.write_blob(b"to be removed")
assert path.exists()
assert storage.unlink_blob(sha) is True
assert not path.exists()
# Second unlink is a no-op rather than a crash.
assert storage.unlink_blob(sha) is False
def test_blob_dir_honors_env(monkeypatch, tmp_path) -> None:
monkeypatch.setenv("DECNET_CANARY_BLOB_DIR", str(tmp_path / "alt"))
assert storage.blob_dir() == tmp_path / "alt"
def test_short_sha_rejected() -> None:
import pytest
with pytest.raises(ValueError):
storage._path_for("abc")

View File

@@ -0,0 +1,42 @@
"""Coverage for the canary bus-topic builder + constants.
The builder shares :func:`_reject_tokens` with every other family in
:mod:`decnet.bus.topics`, so we only need to exercise the canary
surface: the three leaf constants and that bogus segments are
rejected. Anything more would duplicate :mod:`tests.bus.test_topics`.
"""
from __future__ import annotations
import pytest
from decnet.bus import topics
def test_canary_constants_are_distinct() -> None:
assert topics.CANARY == "canary"
assert topics.CANARY_PLACED == "placed"
assert topics.CANARY_TRIGGERED == "triggered"
assert topics.CANARY_REVOKED == "revoked"
assert len({
topics.CANARY_PLACED,
topics.CANARY_TRIGGERED,
topics.CANARY_REVOKED,
}) == 3
def test_canary_builder_round_trip() -> None:
assert topics.canary("abc-123", topics.CANARY_TRIGGERED) == "canary.abc-123.triggered"
assert topics.canary("xyz", topics.CANARY_PLACED) == "canary.xyz.placed"
assert topics.canary("xyz", topics.CANARY_REVOKED) == "canary.xyz.revoked"
@pytest.mark.parametrize("bogus_id", ["", "with.dot", "with*wildcard", "with>chevron", "with space"])
def test_canary_builder_rejects_bad_token_id(bogus_id: str) -> None:
with pytest.raises(ValueError):
topics.canary(bogus_id, topics.CANARY_TRIGGERED)
@pytest.mark.parametrize("bogus_event", ["", "x.y", "*", ">"])
def test_canary_builder_rejects_bad_event(bogus_event: str) -> None:
with pytest.raises(ValueError):
topics.canary("good_id", bogus_event)