feat(creds): cred-reuse foundation + vectorstore scaffold

Lays the storage and bus substrate for the "credential reuse patterns"
task in DEVELOPMENT.md and scaffolds decnet/vectorstore/ as the future
substrate for statistical attacker re-identification over behavioral
fingerprints. No correlator, profiler, API, or dashboard wiring in
this commit — see TODO.md for the handoff.

Schema:
  - Credential.attacker_uuid (nullable FK to attackers.uuid),
    backfilled by the profiler post-write to avoid coupling the
    capture path to the profiler's ordering.
  - CredentialReuse table — UUID PK, JSON list columns for the
    accumulating attacker_uuids/ips/deckies/services, target_count
    (the discriminative scalar), confidence reserved for a future
    fuzzy-credential pass.

Repo:
  - upsert_credential_reuse / list_credential_reuses /
    get_credential_reuse_by_id / update_credential_attacker_uuid.
  - Renamed pre-existing get_credential_reuse(secret_sha256) to
    get_credential_attempts_for_secret(secret_sha256) — the new
    findings table needs the cleaner name.

Bus topics:
  - credential.captured (one per Credential upsert)
  - credential.reuse.detected (correlator-emitted on insert/grow)

Vectorstore subpackage (decnet/vectorstore/, flat layout mirroring
decnet/bus/):
  - BaseVectorStore ABC keyed by (kind, id) — kind discriminator
    means new feature families are additive, no schema migration.
  - FakeVectorStore (in-memory L2 KNN), NullVectorStore (no-op for
    DECNET_VECTORSTORE_ENABLED=false), SqliteVecVectorStore (lazy
    sqlite_vec extension load, one vec0 virtual table per kind).
  - get_vectorstore() env-driven dispatch with graceful fallback
    to FakeVectorStore when the sqlite-vec extension isn't on the
    host, so workers don't crash on a missing optional dep.

Tests: 26 new (11 cred-reuse repo, 15 vectorstore). Existing
credentials and base-repo tests updated for the rename. Total: 34
passing on the touched files.
This commit is contained in:
2026-04-26 03:18:34 -04:00
parent 817ce32e6d
commit ce4be68501
17 changed files with 1615 additions and 11 deletions

View File

@@ -23,7 +23,11 @@ class DummyRepo(BaseRepository):
async def get_credentials(self, **kw): await super().get_credentials(**kw)
async def get_total_credentials(self, **kw): await super().get_total_credentials(**kw)
async def get_credentials_for_attacker(self, ip): await super().get_credentials_for_attacker(ip)
async def get_credential_reuse(self, h): await super().get_credential_reuse(h)
async def get_credential_attempts_for_secret(self, h): await super().get_credential_attempts_for_secret(h)
async def upsert_credential_reuse(self, **kw): await super().upsert_credential_reuse(**kw); return None
async def list_credential_reuses(self, **kw): await super().list_credential_reuses(**kw); return (0, [])
async def get_credential_reuse_by_id(self, i): await super().get_credential_reuse_by_id(i)
async def update_credential_attacker_uuid(self, ip, u): await super().update_credential_attacker_uuid(ip, u); return 0
async def get_state(self, k): await super().get_state(k)
async def set_state(self, k, v): await super().set_state(k, v)
async def get_max_log_id(self): await super().get_max_log_id()
@@ -73,7 +77,15 @@ async def test_base_repo_coverage():
await dr.get_credentials()
await dr.get_total_credentials()
await dr.get_credentials_for_attacker("1.2.3.4")
await dr.get_credential_reuse("abc")
await dr.get_credential_attempts_for_secret("abc")
await dr.upsert_credential_reuse(
secret_sha256="x", secret_kind="plaintext", principal=None,
attacker_uuid=None, attacker_ip="1.2.3.4", decky="d", service="ssh",
attempt_count=1, ts=None,
)
await dr.list_credential_reuses()
await dr.get_credential_reuse_by_id("a")
await dr.update_credential_attacker_uuid("1.2.3.4", "u")
await dr.get_state("k")
await dr.set_state("k", "v")
await dr.get_max_log_id()

View File

@@ -0,0 +1,226 @@
"""CredentialReuse repo tests — upsert idempotency, list pagination, FK backfill."""
from __future__ import annotations
import hashlib
from pathlib import Path
import pytest
from decnet.web.db.factory import get_repository
@pytest.fixture
async def repo(tmp_path: Path):
r = get_repository(db_path=str(tmp_path / "reuse.db"))
await r.initialize()
return r
def _sha256(s: str) -> str:
return hashlib.sha256(s.encode("utf-8")).hexdigest()
async def _seed_credential(repo, **overrides):
base = {
"attacker_ip": "10.0.0.5",
"decky_name": "decky-01",
"service": "ssh",
"principal": "root",
"secret_sha256": _sha256("hunter2"),
"secret_b64": "aHVudGVyMg==",
"secret_printable": "hunter2",
"fields": {},
}
base.update(overrides)
return await repo.upsert_credential(base)
@pytest.mark.anyio
async def test_upsert_inserts_first_observation(repo) -> None:
sha = _sha256("hunter2")
out = await repo.upsert_credential_reuse(
secret_sha256=sha, secret_kind="plaintext", principal="root",
attacker_uuid=None, attacker_ip="10.0.0.5",
decky="decky-01", service="ssh", attempt_count=1,
)
assert out is not None
assert out["inserted"] is True
assert out["target_count"] == 1
assert out["confidence"] == 1.0
@pytest.mark.anyio
async def test_upsert_grows_target_count_across_services(repo) -> None:
"""Same secret on two distinct (decky, service) pairs → target_count=2.
target_count is recomputed from the credentials table, so the test
must seed actual Credential rows first.
"""
sha = _sha256("p4ssw0rd")
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
await _seed_credential(repo, secret_sha256=sha, decky_name="d2", service="ftp")
await repo.upsert_credential_reuse(
secret_sha256=sha, secret_kind="plaintext", principal="root",
attacker_uuid=None, attacker_ip="10.0.0.5",
decky="d1", service="ssh", attempt_count=1,
)
out = await repo.upsert_credential_reuse(
secret_sha256=sha, secret_kind="plaintext", principal="root",
attacker_uuid=None, attacker_ip="10.0.0.5",
decky="d2", service="ftp", attempt_count=1,
)
assert out["inserted"] is False
assert out["changed"] is True
assert out["target_count"] == 2
@pytest.mark.anyio
async def test_upsert_dedups_same_decky_service(repo) -> None:
"""Repeated upserts for the same (decky, service) don't grow target_count."""
sha = _sha256("samepw")
await _seed_credential(repo, secret_sha256=sha)
for _ in range(3):
await repo.upsert_credential_reuse(
secret_sha256=sha, secret_kind="plaintext", principal="root",
attacker_uuid=None, attacker_ip="10.0.0.5",
decky="decky-01", service="ssh", attempt_count=1,
)
rows = (await repo.list_credential_reuses(min_target_count=1))[1]
assert len(rows) == 1
assert rows[0]["target_count"] == 1
assert rows[0]["attempt_count"] == 3
@pytest.mark.anyio
async def test_upsert_merges_attacker_lists(repo) -> None:
"""Distinct attacker_uuid/ip values accumulate into the JSON lists."""
sha = _sha256("shared")
await _seed_credential(repo, secret_sha256=sha, attacker_ip="1.1.1.1")
await _seed_credential(
repo, secret_sha256=sha, attacker_ip="2.2.2.2", decky_name="d2",
)
await repo.upsert_credential_reuse(
secret_sha256=sha, secret_kind="plaintext", principal="root",
attacker_uuid="uuid-A", attacker_ip="1.1.1.1",
decky="decky-01", service="ssh", attempt_count=1,
)
await repo.upsert_credential_reuse(
secret_sha256=sha, secret_kind="plaintext", principal="root",
attacker_uuid="uuid-B", attacker_ip="2.2.2.2",
decky="d2", service="ssh", attempt_count=1,
)
rows = (await repo.list_credential_reuses(min_target_count=1))[1]
assert set(rows[0]["attacker_uuids"]) == {"uuid-A", "uuid-B"}
assert set(rows[0]["attacker_ips"]) == {"1.1.1.1", "2.2.2.2"}
@pytest.mark.anyio
async def test_null_principal_uniqueness(repo) -> None:
"""Two upserts with principal=None go to the same row, not two rows."""
sha = _sha256("redis-auth")
await _seed_credential(repo, secret_sha256=sha, service="redis", principal=None)
for _ in range(2):
await repo.upsert_credential_reuse(
secret_sha256=sha, secret_kind="plaintext", principal=None,
attacker_uuid=None, attacker_ip="1.1.1.1",
decky="decky-01", service="redis", attempt_count=1,
)
rows = (await repo.list_credential_reuses(min_target_count=1))[1]
assert len(rows) == 1
assert rows[0]["principal"] is None
@pytest.mark.anyio
async def test_list_filters_by_min_target_count(repo) -> None:
"""min_target_count=2 hides 1-target findings."""
sha = _sha256("only-once")
await _seed_credential(repo, secret_sha256=sha)
await repo.upsert_credential_reuse(
secret_sha256=sha, secret_kind="plaintext", principal="root",
attacker_uuid=None, attacker_ip="1.1.1.1",
decky="decky-01", service="ssh", attempt_count=1,
)
total, rows = await repo.list_credential_reuses(min_target_count=2)
assert total == 0
assert rows == []
total, _ = await repo.list_credential_reuses(min_target_count=1)
assert total == 1
@pytest.mark.anyio
async def test_list_pagination_orders_by_target_count_desc(repo) -> None:
sha_a = _sha256("a")
sha_b = _sha256("b")
# secret a → 1 target
await _seed_credential(repo, secret_sha256=sha_a)
await repo.upsert_credential_reuse(
secret_sha256=sha_a, secret_kind="plaintext", principal="root",
attacker_uuid=None, attacker_ip="1.1.1.1",
decky="d1", service="ssh", attempt_count=1,
)
# secret b → 2 targets
await _seed_credential(repo, secret_sha256=sha_b, service="ssh")
await _seed_credential(repo, secret_sha256=sha_b, service="ftp", decky_name="d2")
await repo.upsert_credential_reuse(
secret_sha256=sha_b, secret_kind="plaintext", principal="root",
attacker_uuid=None, attacker_ip="1.1.1.1",
decky="decky-01", service="ssh", attempt_count=1,
)
await repo.upsert_credential_reuse(
secret_sha256=sha_b, secret_kind="plaintext", principal="root",
attacker_uuid=None, attacker_ip="1.1.1.1",
decky="d2", service="ftp", attempt_count=1,
)
total, rows = await repo.list_credential_reuses(min_target_count=1)
assert total == 2
assert rows[0]["secret_sha256"] == sha_b # higher target_count first
@pytest.mark.anyio
async def test_get_by_id_roundtrip(repo) -> None:
sha = _sha256("rt")
await _seed_credential(repo, secret_sha256=sha)
out = await repo.upsert_credential_reuse(
secret_sha256=sha, secret_kind="plaintext", principal="root",
attacker_uuid=None, attacker_ip="1.1.1.1",
decky="decky-01", service="ssh", attempt_count=1,
)
fetched = await repo.get_credential_reuse_by_id(out["id"])
assert fetched is not None
assert fetched["id"] == out["id"]
assert fetched["secret_sha256"] == sha
assert isinstance(fetched["deckies"], list)
@pytest.mark.anyio
async def test_get_by_id_missing_returns_none(repo) -> None:
assert await repo.get_credential_reuse_by_id("nope") is None
@pytest.mark.anyio
async def test_update_credential_attacker_uuid_backfills_only_nulls(repo) -> None:
"""The profiler hook must backfill attacker_uuid only on rows where it
is currently null — pre-existing UUIDs must not be overwritten."""
sha = _sha256("backfill")
await _seed_credential(repo, secret_sha256=sha, attacker_ip="9.9.9.9")
await _seed_credential(
repo, secret_sha256=sha, attacker_ip="9.9.9.9",
service="ftp", decky_name="d2",
)
# Backfill: both null, both should update.
n = await repo.update_credential_attacker_uuid("9.9.9.9", "uuid-9")
assert n == 2
# Second call: both already set, nothing should change.
n2 = await repo.update_credential_attacker_uuid("9.9.9.9", "uuid-other")
assert n2 == 0
rows = await repo.get_credentials_for_attacker("9.9.9.9")
assert all(r["attacker_uuid"] == "uuid-9" for r in rows)
@pytest.mark.anyio
async def test_update_credential_attacker_uuid_no_match(repo) -> None:
n = await repo.update_credential_attacker_uuid("0.0.0.0", "uuid-x")
assert n == 0

View File

@@ -101,7 +101,7 @@ async def test_cross_service_reuse_query(repo) -> None:
"secret_printable": secret,
"fields": {},
})
reuse = await repo.get_credential_reuse(sha)
reuse = await repo.get_credential_attempts_for_secret(sha)
assert {r["service"] for r in reuse} == {"ssh", "ftp", "smtp"}

View File

View File

@@ -0,0 +1,66 @@
"""Tests for :func:`decnet.vectorstore.factory.get_vectorstore` dispatch."""
from __future__ import annotations
import os
import pytest
from decnet.vectorstore.factory import _default_db_path, get_vectorstore
from decnet.vectorstore.fake import FakeVectorStore, NullVectorStore
def test_disabled_returns_null(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("DECNET_VECTORSTORE_ENABLED", "false")
monkeypatch.setenv("DECNET_VECTORSTORE_TYPE", "sqlite_vec") # ignored when disabled
s = get_vectorstore()
assert isinstance(s, NullVectorStore)
def test_fake_dispatch(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("DECNET_VECTORSTORE_ENABLED", "true")
monkeypatch.setenv("DECNET_VECTORSTORE_TYPE", "fake")
s = get_vectorstore()
assert isinstance(s, FakeVectorStore)
def test_sqlite_vec_falls_back_to_fake_when_extension_missing(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""The factory must degrade gracefully when sqlite_vec isn't installed:
log a warning, return FakeVectorStore. Workers stay alive instead of
crashing on a missing optional dep."""
monkeypatch.setenv("DECNET_VECTORSTORE_ENABLED", "true")
monkeypatch.setenv("DECNET_VECTORSTORE_TYPE", "sqlite_vec")
# Force the import to fail regardless of what's actually installed,
# so this test is deterministic on dev boxes that have the extension.
import builtins
real_import = builtins.__import__
def _fake_import(name, *a, **kw): # noqa: ANN001
if name == "sqlite_vec":
raise ImportError("forced")
return real_import(name, *a, **kw)
monkeypatch.setattr(builtins, "__import__", _fake_import)
s = get_vectorstore()
assert isinstance(s, FakeVectorStore)
def test_unknown_type_raises(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("DECNET_VECTORSTORE_ENABLED", "true")
monkeypatch.setenv("DECNET_VECTORSTORE_TYPE", "qdrant")
with pytest.raises(ValueError, match="Unsupported vectorstore type"):
get_vectorstore()
def test_default_db_path_honors_env(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("DECNET_VECTORSTORE_PATH", "/tmp/explicit.sqlite")
assert _default_db_path() == "/tmp/explicit.sqlite"
def test_default_db_path_falls_back_to_home(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.delenv("DECNET_VECTORSTORE_PATH", raising=False)
monkeypatch.setattr("os.path.isdir", lambda p: False)
p = _default_db_path()
assert p.endswith(".decnet/vectors.sqlite")
assert p.startswith(os.path.expanduser("~"))

View File

@@ -0,0 +1,113 @@
"""Tests for :class:`FakeVectorStore` and :class:`NullVectorStore`.
The fake is the reference implementation of the BaseVectorStore
contract — every behavior assertion here doubles as a contract test
that any future backend must satisfy.
"""
from __future__ import annotations
import pytest
from decnet.vectorstore.fake import FakeVectorStore, NullVectorStore
@pytest.mark.anyio
async def test_fake_round_trip() -> None:
s = FakeVectorStore()
await s.initialize()
await s.insert("ja3", "sess-1", [1.0, 0.0, 0.0])
await s.insert("ja3", "sess-2", [0.9, 0.1, 0.0])
await s.insert("ja3", "sess-3", [0.0, 1.0, 0.0])
rec = await s.get("ja3", "sess-1")
assert rec is not None
assert rec.kind == "ja3"
assert rec.id == "sess-1"
assert rec.dim == 3
assert tuple(rec.vector) == (1.0, 0.0, 0.0)
@pytest.mark.anyio
async def test_fake_knn_orders_by_distance() -> None:
s = FakeVectorStore()
await s.initialize()
await s.insert("ja3", "near", [1.0, 0.0])
await s.insert("ja3", "far", [0.0, 1.0])
await s.insert("ja3", "exact", [0.99, 0.01])
n = await s.knn("ja3", [1.0, 0.0], k=3)
assert [x.id for x in n] == ["near", "exact", "far"]
assert n[0].distance == 0.0
assert n[2].distance > n[1].distance
@pytest.mark.anyio
async def test_fake_knn_unknown_kind_returns_empty() -> None:
s = FakeVectorStore()
await s.initialize()
assert await s.knn("never_seen", [0.1, 0.2]) == []
@pytest.mark.anyio
async def test_fake_dim_mismatch_raises() -> None:
s = FakeVectorStore()
await s.initialize()
await s.insert("hassh", "a", [1.0, 2.0, 3.0])
with pytest.raises(ValueError, match="dim mismatch"):
await s.insert("hassh", "b", [1.0, 2.0])
@pytest.mark.anyio
async def test_fake_knn_query_dim_mismatch_raises() -> None:
s = FakeVectorStore()
await s.initialize()
await s.insert("kd", "a", [0.1, 0.2, 0.3])
with pytest.raises(ValueError):
await s.knn("kd", [0.1, 0.2])
@pytest.mark.anyio
async def test_fake_replace_existing_id() -> None:
s = FakeVectorStore()
await s.initialize()
await s.insert("k", "id1", [1.0, 0.0])
await s.insert("k", "id1", [0.0, 1.0])
rec = await s.get("k", "id1")
assert tuple(rec.vector) == (0.0, 1.0)
@pytest.mark.anyio
async def test_fake_delete() -> None:
s = FakeVectorStore()
await s.initialize()
await s.insert("k", "id1", [1.0])
assert await s.delete("k", "id1") is True
assert await s.delete("k", "id1") is False
assert await s.get("k", "id1") is None
@pytest.mark.anyio
async def test_fake_health_reports_counts() -> None:
s = FakeVectorStore()
await s.initialize()
h = await s.health()
assert h == {"ok": True, "backend": "fake", "kinds": 0, "vectors": 0}
await s.insert("a", "1", [1.0])
await s.insert("a", "2", [2.0])
await s.insert("b", "1", [3.0, 4.0])
h = await s.health()
assert h["kinds"] == 2
assert h["vectors"] == 3
@pytest.mark.anyio
async def test_null_store_is_inert() -> None:
s = NullVectorStore()
await s.initialize()
await s.insert("k", "id", [1.0, 2.0]) # no-op
assert await s.get("k", "id") is None
assert await s.knn("k", [1.0, 2.0]) == []
assert await s.delete("k", "id") is False
h = await s.health()
assert h["backend"] == "null"
await s.close()