Files
DECNET/decnet/vectorstore/base.py
anti f2b3393669 chore: relicense to AGPL-3.0-or-later and add SPDX headers
Replaces LICENSE (GPLv3 -> AGPLv3) and prepends
`SPDX-License-Identifier: AGPL-3.0-or-later` to every source file
across decnet/, decnet_web/, tests/, scripts/, and tools/.

Rationale: closes the GPLv3 ASP loophole so any party operating a
modified DECNET as a network service must offer their modified
source. Personal copyright (Samuel Paschuan) + inbound=outbound
contributions make a future unilateral relicense infeasible.

- LICENSE: full AGPL-3.0 text (gnu.org/licenses/agpl-3.0.txt)
- COPYRIGHT: project copyright notice
- tools/add_spdx_headers.py: idempotent header injector
  (shebang- and PEP 263-aware)

Touches 1565 source files (.py, .ts, .tsx, .js, .jsx, .css, .sh).
No behavior change; comments only.
2026-05-22 21:04:16 -04:00

116 lines
3.9 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
"""Vector-store abstractions: :class:`BaseVectorStore` ABC + record types.
Every backend (sqlite-vec, in-memory fake, future pgvector / Qdrant)
speaks this contract. The store is keyed by ``(kind, id)`` where:
* ``kind`` is a short discriminator (``ja3``, ``hassh``,
``keystroke_dwell``, ``cmd_ngram``, ...) — vectors are only ever
compared **within the same kind**, so adding a new feature family is
a non-event for the store.
* ``id`` is a stable identifier owned by the caller — typically the
``session_id`` or ``attacker_uuid``. The store does not interpret it.
* ``extractor_version`` is recorded alongside the vector so v1 vs v2 of
the same kind never get cross-compared by accident — a similarity
scorer that respects versioning is the consumer's responsibility, but
the data it needs is here.
The contract is intentionally minimal (insert/get/knn/delete/health) so
backends with different physical layouts can implement it
straightforwardly. No batch APIs in v1 — sub-millisecond per-vector
overhead at honeypot scales (≤ 100k vectors per kind) makes batching
unnecessary, and the loop-over-singles pattern keeps the contract small.
"""
from __future__ import annotations
import abc
from dataclasses import dataclass
from typing import Optional, Sequence
# Bumped when the wire/ABI shape of records changes incompatibly.
# Backends MAY refuse to load older data when this changes, but the
# pre-v1 expectation is to migrate forward in the same release.
VECTORSTORE_SCHEMA_VERSION = 1
@dataclass(frozen=True)
class VectorRecord:
"""One stored vector, returned by :meth:`BaseVectorStore.get`."""
kind: str
id: str
vector: Sequence[float]
dim: int
extractor_version: int = 1
@dataclass(frozen=True)
class Neighbor:
"""One similarity-search hit, returned by :meth:`BaseVectorStore.knn`.
``distance`` is whatever the backend's native metric reports —
cosine distance for sqlite-vec's default index, L2 for the in-memory
fake. Smaller is more similar in both cases. Consumers that need
a uniform metric should configure the backend explicitly.
"""
kind: str
id: str
distance: float
class BaseVectorStore(abc.ABC):
"""Async interface for a kind-discriminated vector store.
Implementations MAY be transactional (sqlite) or not (pure
in-memory). All methods are async to match the rest of the DECNET
storage layer; trivial backends can ``await`` no-op coroutines.
"""
@abc.abstractmethod
async def initialize(self) -> None:
"""One-shot setup (open files, load extensions, create tables)."""
@abc.abstractmethod
async def close(self) -> None:
"""Release resources. Idempotent."""
@abc.abstractmethod
async def health(self) -> dict:
"""Liveness + capability probe.
Returns a dict like ``{"ok": True, "backend": "sqlite_vec",
"kinds": 4, "vectors": 12_345}``. Used by ``/api/v1/health`` and
diagnostics; never raises — backends that can't determine a
field set it to None.
"""
@abc.abstractmethod
async def insert(
self,
kind: str,
id: str,
vector: Sequence[float],
*,
extractor_version: int = 1,
) -> None:
"""Insert or replace ``(kind, id)``. Vector dim is fixed per kind
the first time a kind is seen; mismatched dims raise.
"""
@abc.abstractmethod
async def get(self, kind: str, id: str) -> Optional[VectorRecord]:
"""Fetch one record, or None if absent."""
@abc.abstractmethod
async def delete(self, kind: str, id: str) -> bool:
"""Delete one record. Returns True if a row was removed."""
@abc.abstractmethod
async def knn(
self, kind: str, vector: Sequence[float], k: int = 10
) -> list[Neighbor]:
"""Return up to *k* nearest neighbors of ``vector`` within
``kind``. Empty list if the kind is unknown or empty.
"""