Files
DECNET/tests/vectorstore/test_fake.py
anti ce4be68501 feat(creds): cred-reuse foundation + vectorstore scaffold
Lays the storage and bus substrate for the "credential reuse patterns"
task in DEVELOPMENT.md and scaffolds decnet/vectorstore/ as the future
substrate for statistical attacker re-identification over behavioral
fingerprints. No correlator, profiler, API, or dashboard wiring in
this commit — see TODO.md for the handoff.

Schema:
  - Credential.attacker_uuid (nullable FK to attackers.uuid),
    backfilled by the profiler post-write to avoid coupling the
    capture path to the profiler's ordering.
  - CredentialReuse table — UUID PK, JSON list columns for the
    accumulating attacker_uuids/ips/deckies/services, target_count
    (the discriminative scalar), confidence reserved for a future
    fuzzy-credential pass.

Repo:
  - upsert_credential_reuse / list_credential_reuses /
    get_credential_reuse_by_id / update_credential_attacker_uuid.
  - Renamed pre-existing get_credential_reuse(secret_sha256) to
    get_credential_attempts_for_secret(secret_sha256) — the new
    findings table needs the cleaner name.

Bus topics:
  - credential.captured (one per Credential upsert)
  - credential.reuse.detected (correlator-emitted on insert/grow)

Vectorstore subpackage (decnet/vectorstore/, flat layout mirroring
decnet/bus/):
  - BaseVectorStore ABC keyed by (kind, id) — kind discriminator
    means new feature families are additive, no schema migration.
  - FakeVectorStore (in-memory L2 KNN), NullVectorStore (no-op for
    DECNET_VECTORSTORE_ENABLED=false), SqliteVecVectorStore (lazy
    sqlite_vec extension load, one vec0 virtual table per kind).
  - get_vectorstore() env-driven dispatch with graceful fallback
    to FakeVectorStore when the sqlite-vec extension isn't on the
    host, so workers don't crash on a missing optional dep.

Tests: 26 new (11 cred-reuse repo, 15 vectorstore). Existing
credentials and base-repo tests updated for the rename. Total: 34
passing on the touched files.
2026-04-26 03:18:34 -04:00

114 lines
3.3 KiB
Python

"""Tests for :class:`FakeVectorStore` and :class:`NullVectorStore`.
The fake is the reference implementation of the BaseVectorStore
contract — every behavior assertion here doubles as a contract test
that any future backend must satisfy.
"""
from __future__ import annotations
import pytest
from decnet.vectorstore.fake import FakeVectorStore, NullVectorStore
@pytest.mark.anyio
async def test_fake_round_trip() -> None:
s = FakeVectorStore()
await s.initialize()
await s.insert("ja3", "sess-1", [1.0, 0.0, 0.0])
await s.insert("ja3", "sess-2", [0.9, 0.1, 0.0])
await s.insert("ja3", "sess-3", [0.0, 1.0, 0.0])
rec = await s.get("ja3", "sess-1")
assert rec is not None
assert rec.kind == "ja3"
assert rec.id == "sess-1"
assert rec.dim == 3
assert tuple(rec.vector) == (1.0, 0.0, 0.0)
@pytest.mark.anyio
async def test_fake_knn_orders_by_distance() -> None:
s = FakeVectorStore()
await s.initialize()
await s.insert("ja3", "near", [1.0, 0.0])
await s.insert("ja3", "far", [0.0, 1.0])
await s.insert("ja3", "exact", [0.99, 0.01])
n = await s.knn("ja3", [1.0, 0.0], k=3)
assert [x.id for x in n] == ["near", "exact", "far"]
assert n[0].distance == 0.0
assert n[2].distance > n[1].distance
@pytest.mark.anyio
async def test_fake_knn_unknown_kind_returns_empty() -> None:
s = FakeVectorStore()
await s.initialize()
assert await s.knn("never_seen", [0.1, 0.2]) == []
@pytest.mark.anyio
async def test_fake_dim_mismatch_raises() -> None:
s = FakeVectorStore()
await s.initialize()
await s.insert("hassh", "a", [1.0, 2.0, 3.0])
with pytest.raises(ValueError, match="dim mismatch"):
await s.insert("hassh", "b", [1.0, 2.0])
@pytest.mark.anyio
async def test_fake_knn_query_dim_mismatch_raises() -> None:
s = FakeVectorStore()
await s.initialize()
await s.insert("kd", "a", [0.1, 0.2, 0.3])
with pytest.raises(ValueError):
await s.knn("kd", [0.1, 0.2])
@pytest.mark.anyio
async def test_fake_replace_existing_id() -> None:
s = FakeVectorStore()
await s.initialize()
await s.insert("k", "id1", [1.0, 0.0])
await s.insert("k", "id1", [0.0, 1.0])
rec = await s.get("k", "id1")
assert tuple(rec.vector) == (0.0, 1.0)
@pytest.mark.anyio
async def test_fake_delete() -> None:
s = FakeVectorStore()
await s.initialize()
await s.insert("k", "id1", [1.0])
assert await s.delete("k", "id1") is True
assert await s.delete("k", "id1") is False
assert await s.get("k", "id1") is None
@pytest.mark.anyio
async def test_fake_health_reports_counts() -> None:
s = FakeVectorStore()
await s.initialize()
h = await s.health()
assert h == {"ok": True, "backend": "fake", "kinds": 0, "vectors": 0}
await s.insert("a", "1", [1.0])
await s.insert("a", "2", [2.0])
await s.insert("b", "1", [3.0, 4.0])
h = await s.health()
assert h["kinds"] == 2
assert h["vectors"] == 3
@pytest.mark.anyio
async def test_null_store_is_inert() -> None:
s = NullVectorStore()
await s.initialize()
await s.insert("k", "id", [1.0, 2.0]) # no-op
assert await s.get("k", "id") is None
assert await s.knn("k", [1.0, 2.0]) == []
assert await s.delete("k", "id") is False
h = await s.health()
assert h["backend"] == "null"
await s.close()