Lays the storage and bus substrate for the "credential reuse patterns"
task in DEVELOPMENT.md and scaffolds decnet/vectorstore/ as the future
substrate for statistical attacker re-identification over behavioral
fingerprints. No correlator, profiler, API, or dashboard wiring in
this commit — see TODO.md for the handoff.
Schema:
- Credential.attacker_uuid (nullable FK to attackers.uuid),
backfilled by the profiler post-write to avoid coupling the
capture path to the profiler's ordering.
- CredentialReuse table — UUID PK, JSON list columns for the
accumulating attacker_uuids/ips/deckies/services, target_count
(the discriminative scalar), confidence reserved for a future
fuzzy-credential pass.
Repo:
- upsert_credential_reuse / list_credential_reuses /
get_credential_reuse_by_id / update_credential_attacker_uuid.
- Renamed pre-existing get_credential_reuse(secret_sha256) to
get_credential_attempts_for_secret(secret_sha256) — the new
findings table needs the cleaner name.
Bus topics:
- credential.captured (one per Credential upsert)
- credential.reuse.detected (correlator-emitted on insert/grow)
Vectorstore subpackage (decnet/vectorstore/, flat layout mirroring
decnet/bus/):
- BaseVectorStore ABC keyed by (kind, id) — kind discriminator
means new feature families are additive, no schema migration.
- FakeVectorStore (in-memory L2 KNN), NullVectorStore (no-op for
DECNET_VECTORSTORE_ENABLED=false), SqliteVecVectorStore (lazy
sqlite_vec extension load, one vec0 virtual table per kind).
- get_vectorstore() env-driven dispatch with graceful fallback
to FakeVectorStore when the sqlite-vec extension isn't on the
host, so workers don't crash on a missing optional dep.
Tests: 26 new (11 cred-reuse repo, 15 vectorstore). Existing
credentials and base-repo tests updated for the rename. Total: 34
passing on the touched files.
114 lines
3.3 KiB
Python
114 lines
3.3 KiB
Python
"""Tests for :class:`FakeVectorStore` and :class:`NullVectorStore`.
|
|
|
|
The fake is the reference implementation of the BaseVectorStore
|
|
contract — every behavior assertion here doubles as a contract test
|
|
that any future backend must satisfy.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from decnet.vectorstore.fake import FakeVectorStore, NullVectorStore
|
|
|
|
|
|
@pytest.mark.anyio
|
|
async def test_fake_round_trip() -> None:
|
|
s = FakeVectorStore()
|
|
await s.initialize()
|
|
await s.insert("ja3", "sess-1", [1.0, 0.0, 0.0])
|
|
await s.insert("ja3", "sess-2", [0.9, 0.1, 0.0])
|
|
await s.insert("ja3", "sess-3", [0.0, 1.0, 0.0])
|
|
|
|
rec = await s.get("ja3", "sess-1")
|
|
assert rec is not None
|
|
assert rec.kind == "ja3"
|
|
assert rec.id == "sess-1"
|
|
assert rec.dim == 3
|
|
assert tuple(rec.vector) == (1.0, 0.0, 0.0)
|
|
|
|
|
|
@pytest.mark.anyio
|
|
async def test_fake_knn_orders_by_distance() -> None:
|
|
s = FakeVectorStore()
|
|
await s.initialize()
|
|
await s.insert("ja3", "near", [1.0, 0.0])
|
|
await s.insert("ja3", "far", [0.0, 1.0])
|
|
await s.insert("ja3", "exact", [0.99, 0.01])
|
|
|
|
n = await s.knn("ja3", [1.0, 0.0], k=3)
|
|
assert [x.id for x in n] == ["near", "exact", "far"]
|
|
assert n[0].distance == 0.0
|
|
assert n[2].distance > n[1].distance
|
|
|
|
|
|
@pytest.mark.anyio
|
|
async def test_fake_knn_unknown_kind_returns_empty() -> None:
|
|
s = FakeVectorStore()
|
|
await s.initialize()
|
|
assert await s.knn("never_seen", [0.1, 0.2]) == []
|
|
|
|
|
|
@pytest.mark.anyio
|
|
async def test_fake_dim_mismatch_raises() -> None:
|
|
s = FakeVectorStore()
|
|
await s.initialize()
|
|
await s.insert("hassh", "a", [1.0, 2.0, 3.0])
|
|
with pytest.raises(ValueError, match="dim mismatch"):
|
|
await s.insert("hassh", "b", [1.0, 2.0])
|
|
|
|
|
|
@pytest.mark.anyio
|
|
async def test_fake_knn_query_dim_mismatch_raises() -> None:
|
|
s = FakeVectorStore()
|
|
await s.initialize()
|
|
await s.insert("kd", "a", [0.1, 0.2, 0.3])
|
|
with pytest.raises(ValueError):
|
|
await s.knn("kd", [0.1, 0.2])
|
|
|
|
|
|
@pytest.mark.anyio
|
|
async def test_fake_replace_existing_id() -> None:
|
|
s = FakeVectorStore()
|
|
await s.initialize()
|
|
await s.insert("k", "id1", [1.0, 0.0])
|
|
await s.insert("k", "id1", [0.0, 1.0])
|
|
rec = await s.get("k", "id1")
|
|
assert tuple(rec.vector) == (0.0, 1.0)
|
|
|
|
|
|
@pytest.mark.anyio
|
|
async def test_fake_delete() -> None:
|
|
s = FakeVectorStore()
|
|
await s.initialize()
|
|
await s.insert("k", "id1", [1.0])
|
|
assert await s.delete("k", "id1") is True
|
|
assert await s.delete("k", "id1") is False
|
|
assert await s.get("k", "id1") is None
|
|
|
|
|
|
@pytest.mark.anyio
|
|
async def test_fake_health_reports_counts() -> None:
|
|
s = FakeVectorStore()
|
|
await s.initialize()
|
|
h = await s.health()
|
|
assert h == {"ok": True, "backend": "fake", "kinds": 0, "vectors": 0}
|
|
await s.insert("a", "1", [1.0])
|
|
await s.insert("a", "2", [2.0])
|
|
await s.insert("b", "1", [3.0, 4.0])
|
|
h = await s.health()
|
|
assert h["kinds"] == 2
|
|
assert h["vectors"] == 3
|
|
|
|
|
|
@pytest.mark.anyio
|
|
async def test_null_store_is_inert() -> None:
|
|
s = NullVectorStore()
|
|
await s.initialize()
|
|
await s.insert("k", "id", [1.0, 2.0]) # no-op
|
|
assert await s.get("k", "id") is None
|
|
assert await s.knn("k", [1.0, 2.0]) == []
|
|
assert await s.delete("k", "id") is False
|
|
h = await s.health()
|
|
assert h["backend"] == "null"
|
|
await s.close()
|