feat(creds): cred-reuse foundation + vectorstore scaffold
Lays the storage and bus substrate for the "credential reuse patterns"
task in DEVELOPMENT.md and scaffolds decnet/vectorstore/ as the future
substrate for statistical attacker re-identification over behavioral
fingerprints. No correlator, profiler, API, or dashboard wiring in
this commit — see TODO.md for the handoff.
Schema:
- Credential.attacker_uuid (nullable FK to attackers.uuid),
backfilled by the profiler post-write to avoid coupling the
capture path to the profiler's ordering.
- CredentialReuse table — UUID PK, JSON list columns for the
accumulating attacker_uuids/ips/deckies/services, target_count
(the discriminative scalar), confidence reserved for a future
fuzzy-credential pass.
Repo:
- upsert_credential_reuse / list_credential_reuses /
get_credential_reuse_by_id / update_credential_attacker_uuid.
- Renamed pre-existing get_credential_reuse(secret_sha256) to
get_credential_attempts_for_secret(secret_sha256) — the new
findings table needs the cleaner name.
Bus topics:
- credential.captured (one per Credential upsert)
- credential.reuse.detected (correlator-emitted on insert/grow)
Vectorstore subpackage (decnet/vectorstore/, flat layout mirroring
decnet/bus/):
- BaseVectorStore ABC keyed by (kind, id) — kind discriminator
means new feature families are additive, no schema migration.
- FakeVectorStore (in-memory L2 KNN), NullVectorStore (no-op for
DECNET_VECTORSTORE_ENABLED=false), SqliteVecVectorStore (lazy
sqlite_vec extension load, one vec0 virtual table per kind).
- get_vectorstore() env-driven dispatch with graceful fallback
to FakeVectorStore when the sqlite-vec extension isn't on the
host, so workers don't crash on a missing optional dep.
Tests: 26 new (11 cred-reuse repo, 15 vectorstore). Existing
credentials and base-repo tests updated for the rename. Total: 34
passing on the touched files.
This commit is contained in:
@@ -49,6 +49,8 @@ from .logs import (
|
||||
Bounty,
|
||||
BountyResponse,
|
||||
Credential,
|
||||
CredentialReuse,
|
||||
CredentialReuseResponse,
|
||||
CredentialsResponse,
|
||||
Log,
|
||||
LogsResponse,
|
||||
@@ -170,6 +172,8 @@ __all__ = [
|
||||
"Bounty",
|
||||
"BountyResponse",
|
||||
"Credential",
|
||||
"CredentialReuse",
|
||||
"CredentialReuseResponse",
|
||||
"CredentialsResponse",
|
||||
"Log",
|
||||
"LogsResponse",
|
||||
|
||||
@@ -3,7 +3,7 @@ from datetime import datetime, timezone
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import Column, Index, Text
|
||||
from sqlalchemy import Column, Index, Text, UniqueConstraint
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
from ._base import _BIG_TEXT
|
||||
@@ -54,9 +54,13 @@ class Credential(SQLModel, table=True):
|
||||
LDAP. Nullable for principal-less mechanisms (Redis AUTH, bearer
|
||||
tokens). Fully service-specific keys ride in ``fields`` JSON.
|
||||
|
||||
Dedup contract: same (attacker_uuid, decky, service, secret_sha256,
|
||||
Dedup contract: same (attacker_ip, decky, service, secret_sha256,
|
||||
principal_or_empty) tuple → upsert, bumps ``attempt_count`` and
|
||||
``last_seen``. Different secret or different principal → new row.
|
||||
|
||||
``attacker_uuid`` is backfilled by the profiler once an Attacker row
|
||||
has been minted for the source IP. It is nullable on first write so
|
||||
the credential ingest path stays decoupled from the profiler.
|
||||
"""
|
||||
__tablename__ = "credentials"
|
||||
__table_args__ = (
|
||||
@@ -64,11 +68,15 @@ class Credential(SQLModel, table=True):
|
||||
Index("ix_credentials_principal_service", "principal", "service"),
|
||||
)
|
||||
id: Optional[int] = Field(default=None, primary_key=True)
|
||||
# Keyed by attacker IP (not attackers.uuid) to match Bounty's pattern
|
||||
# and avoid the chicken-and-egg of writing a credential row before
|
||||
# the profiler has minted the Attacker. Index covers the join path
|
||||
# cred_reuse → Attacker.ip.
|
||||
# Keyed by attacker IP (not attackers.uuid) on the write path to
|
||||
# avoid the chicken-and-egg of landing a credential before the
|
||||
# profiler has minted the Attacker. The profiler backfills
|
||||
# ``attacker_uuid`` once it knows the IP, so cross-IP reuse queries
|
||||
# eventually have an indexed FK to traverse.
|
||||
attacker_ip: str = Field(index=True)
|
||||
attacker_uuid: Optional[str] = Field(
|
||||
default=None, foreign_key="attackers.uuid", index=True
|
||||
)
|
||||
decky_name: str = Field(index=True)
|
||||
service: str = Field(index=True)
|
||||
principal: Optional[str] = Field(default=None, index=True, max_length=256)
|
||||
@@ -107,6 +115,77 @@ class Credential(SQLModel, table=True):
|
||||
attempt_count: int = Field(default=1)
|
||||
|
||||
|
||||
class CredentialReuse(SQLModel, table=True):
|
||||
"""One observed credential reuse pattern across deckies and/or services.
|
||||
|
||||
A row here is a *finding* produced by the correlator: the same
|
||||
``(secret_sha256, secret_kind, principal)`` tuple was observed
|
||||
against ``target_count`` distinct decky×service pairs. Upserted on
|
||||
that natural key — the row accumulates new deckies/services/IPs
|
||||
over time as the credential is reused.
|
||||
|
||||
The ``confidence`` column is reserved for a future fuzzy-match pass
|
||||
(credential variants, e.g. ``hunter2`` vs ``hunter22``); rows
|
||||
written by the exact-secret correlator are always 1.0.
|
||||
"""
|
||||
__tablename__ = "credential_reuse"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"secret_sha256", "secret_kind", "principal_key",
|
||||
name="uq_credential_reuse_secret_principal",
|
||||
),
|
||||
)
|
||||
id: str = Field(primary_key=True, max_length=36)
|
||||
secret_sha256: str = Field(index=True, max_length=64)
|
||||
secret_kind: str = Field(index=True, max_length=32)
|
||||
# Optional human-readable principal (e.g. "root"). Nullable — for
|
||||
# cross-principal reuse rows we leave this null, but we still need
|
||||
# a unique constraint, so ``principal_key`` is the non-null
|
||||
# canonicalised form ("" when principal is null) used in the
|
||||
# uniqueness tuple. SQLite's NULLs-distinct-in-UNIQUE behaviour
|
||||
# would otherwise let duplicate null-principal rows through.
|
||||
principal: Optional[str] = Field(default=None, max_length=256)
|
||||
principal_key: str = Field(default="", max_length=256)
|
||||
attacker_uuids: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column("attacker_uuids", _BIG_TEXT, nullable=False, default="[]"),
|
||||
) # JSON list[str]
|
||||
attacker_ips: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column("attacker_ips", _BIG_TEXT, nullable=False, default="[]"),
|
||||
) # JSON list[str]
|
||||
deckies: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column("deckies", _BIG_TEXT, nullable=False, default="[]"),
|
||||
) # JSON list[str]
|
||||
services: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column("services", _BIG_TEXT, nullable=False, default="[]"),
|
||||
) # JSON list[str]
|
||||
# COUNT(DISTINCT decky||':'||service). The discriminative scalar
|
||||
# for ranking and filtering — a credential seen on 12 targets is
|
||||
# far more interesting than one seen on 2.
|
||||
target_count: int = Field(default=0, index=True)
|
||||
attempt_count: int = Field(default=0)
|
||||
confidence: float = Field(default=1.0)
|
||||
first_seen: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc), index=True
|
||||
)
|
||||
last_seen: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc), index=True
|
||||
)
|
||||
updated_at: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc), index=True
|
||||
)
|
||||
|
||||
|
||||
class CredentialReuseResponse(BaseModel):
|
||||
total: int
|
||||
limit: int
|
||||
offset: int
|
||||
data: List[dict[str, Any]]
|
||||
|
||||
|
||||
class State(SQLModel, table=True):
|
||||
__tablename__ = "state"
|
||||
key: str = Field(primary_key=True)
|
||||
|
||||
Reference in New Issue
Block a user