feat(creds): cred-reuse foundation + vectorstore scaffold
Lays the storage and bus substrate for the "credential reuse patterns"
task in DEVELOPMENT.md and scaffolds decnet/vectorstore/ as the future
substrate for statistical attacker re-identification over behavioral
fingerprints. No correlator, profiler, API, or dashboard wiring in
this commit — see TODO.md for the handoff.
Schema:
- Credential.attacker_uuid (nullable FK to attackers.uuid),
backfilled by the profiler post-write to avoid coupling the
capture path to the profiler's ordering.
- CredentialReuse table — UUID PK, JSON list columns for the
accumulating attacker_uuids/ips/deckies/services, target_count
(the discriminative scalar), confidence reserved for a future
fuzzy-credential pass.
Repo:
- upsert_credential_reuse / list_credential_reuses /
get_credential_reuse_by_id / update_credential_attacker_uuid.
- Renamed pre-existing get_credential_reuse(secret_sha256) to
get_credential_attempts_for_secret(secret_sha256) — the new
findings table needs the cleaner name.
Bus topics:
- credential.captured (one per Credential upsert)
- credential.reuse.detected (correlator-emitted on insert/grow)
Vectorstore subpackage (decnet/vectorstore/, flat layout mirroring
decnet/bus/):
- BaseVectorStore ABC keyed by (kind, id) — kind discriminator
means new feature families are additive, no schema migration.
- FakeVectorStore (in-memory L2 KNN), NullVectorStore (no-op for
DECNET_VECTORSTORE_ENABLED=false), SqliteVecVectorStore (lazy
sqlite_vec extension load, one vec0 virtual table per kind).
- get_vectorstore() env-driven dispatch with graceful fallback
to FakeVectorStore when the sqlite-vec extension isn't on the
host, so workers don't crash on a missing optional dep.
Tests: 26 new (11 cred-reuse repo, 15 vectorstore). Existing
credentials and base-repo tests updated for the rename. Total: 34
passing on the touched files.
This commit is contained in:
@@ -49,6 +49,8 @@ from .logs import (
|
||||
Bounty,
|
||||
BountyResponse,
|
||||
Credential,
|
||||
CredentialReuse,
|
||||
CredentialReuseResponse,
|
||||
CredentialsResponse,
|
||||
Log,
|
||||
LogsResponse,
|
||||
@@ -170,6 +172,8 @@ __all__ = [
|
||||
"Bounty",
|
||||
"BountyResponse",
|
||||
"Credential",
|
||||
"CredentialReuse",
|
||||
"CredentialReuseResponse",
|
||||
"CredentialsResponse",
|
||||
"Log",
|
||||
"LogsResponse",
|
||||
|
||||
@@ -3,7 +3,7 @@ from datetime import datetime, timezone
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import Column, Index, Text
|
||||
from sqlalchemy import Column, Index, Text, UniqueConstraint
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
from ._base import _BIG_TEXT
|
||||
@@ -54,9 +54,13 @@ class Credential(SQLModel, table=True):
|
||||
LDAP. Nullable for principal-less mechanisms (Redis AUTH, bearer
|
||||
tokens). Fully service-specific keys ride in ``fields`` JSON.
|
||||
|
||||
Dedup contract: same (attacker_uuid, decky, service, secret_sha256,
|
||||
Dedup contract: same (attacker_ip, decky, service, secret_sha256,
|
||||
principal_or_empty) tuple → upsert, bumps ``attempt_count`` and
|
||||
``last_seen``. Different secret or different principal → new row.
|
||||
|
||||
``attacker_uuid`` is backfilled by the profiler once an Attacker row
|
||||
has been minted for the source IP. It is nullable on first write so
|
||||
the credential ingest path stays decoupled from the profiler.
|
||||
"""
|
||||
__tablename__ = "credentials"
|
||||
__table_args__ = (
|
||||
@@ -64,11 +68,15 @@ class Credential(SQLModel, table=True):
|
||||
Index("ix_credentials_principal_service", "principal", "service"),
|
||||
)
|
||||
id: Optional[int] = Field(default=None, primary_key=True)
|
||||
# Keyed by attacker IP (not attackers.uuid) to match Bounty's pattern
|
||||
# and avoid the chicken-and-egg of writing a credential row before
|
||||
# the profiler has minted the Attacker. Index covers the join path
|
||||
# cred_reuse → Attacker.ip.
|
||||
# Keyed by attacker IP (not attackers.uuid) on the write path to
|
||||
# avoid the chicken-and-egg of landing a credential before the
|
||||
# profiler has minted the Attacker. The profiler backfills
|
||||
# ``attacker_uuid`` once it knows the IP, so cross-IP reuse queries
|
||||
# eventually have an indexed FK to traverse.
|
||||
attacker_ip: str = Field(index=True)
|
||||
attacker_uuid: Optional[str] = Field(
|
||||
default=None, foreign_key="attackers.uuid", index=True
|
||||
)
|
||||
decky_name: str = Field(index=True)
|
||||
service: str = Field(index=True)
|
||||
principal: Optional[str] = Field(default=None, index=True, max_length=256)
|
||||
@@ -107,6 +115,77 @@ class Credential(SQLModel, table=True):
|
||||
attempt_count: int = Field(default=1)
|
||||
|
||||
|
||||
class CredentialReuse(SQLModel, table=True):
|
||||
"""One observed credential reuse pattern across deckies and/or services.
|
||||
|
||||
A row here is a *finding* produced by the correlator: the same
|
||||
``(secret_sha256, secret_kind, principal)`` tuple was observed
|
||||
against ``target_count`` distinct decky×service pairs. Upserted on
|
||||
that natural key — the row accumulates new deckies/services/IPs
|
||||
over time as the credential is reused.
|
||||
|
||||
The ``confidence`` column is reserved for a future fuzzy-match pass
|
||||
(credential variants, e.g. ``hunter2`` vs ``hunter22``); rows
|
||||
written by the exact-secret correlator are always 1.0.
|
||||
"""
|
||||
__tablename__ = "credential_reuse"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"secret_sha256", "secret_kind", "principal_key",
|
||||
name="uq_credential_reuse_secret_principal",
|
||||
),
|
||||
)
|
||||
id: str = Field(primary_key=True, max_length=36)
|
||||
secret_sha256: str = Field(index=True, max_length=64)
|
||||
secret_kind: str = Field(index=True, max_length=32)
|
||||
# Optional human-readable principal (e.g. "root"). Nullable — for
|
||||
# cross-principal reuse rows we leave this null, but we still need
|
||||
# a unique constraint, so ``principal_key`` is the non-null
|
||||
# canonicalised form ("" when principal is null) used in the
|
||||
# uniqueness tuple. SQLite's NULLs-distinct-in-UNIQUE behaviour
|
||||
# would otherwise let duplicate null-principal rows through.
|
||||
principal: Optional[str] = Field(default=None, max_length=256)
|
||||
principal_key: str = Field(default="", max_length=256)
|
||||
attacker_uuids: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column("attacker_uuids", _BIG_TEXT, nullable=False, default="[]"),
|
||||
) # JSON list[str]
|
||||
attacker_ips: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column("attacker_ips", _BIG_TEXT, nullable=False, default="[]"),
|
||||
) # JSON list[str]
|
||||
deckies: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column("deckies", _BIG_TEXT, nullable=False, default="[]"),
|
||||
) # JSON list[str]
|
||||
services: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column("services", _BIG_TEXT, nullable=False, default="[]"),
|
||||
) # JSON list[str]
|
||||
# COUNT(DISTINCT decky||':'||service). The discriminative scalar
|
||||
# for ranking and filtering — a credential seen on 12 targets is
|
||||
# far more interesting than one seen on 2.
|
||||
target_count: int = Field(default=0, index=True)
|
||||
attempt_count: int = Field(default=0)
|
||||
confidence: float = Field(default=1.0)
|
||||
first_seen: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc), index=True
|
||||
)
|
||||
last_seen: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc), index=True
|
||||
)
|
||||
updated_at: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc), index=True
|
||||
)
|
||||
|
||||
|
||||
class CredentialReuseResponse(BaseModel):
|
||||
total: int
|
||||
limit: int
|
||||
offset: int
|
||||
data: List[dict[str, Any]]
|
||||
|
||||
|
||||
class State(SQLModel, table=True):
|
||||
__tablename__ = "state"
|
||||
key: str = Field(primary_key=True)
|
||||
|
||||
@@ -153,12 +153,59 @@ class BaseRepository(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_credential_reuse(
|
||||
async def get_credential_attempts_for_secret(
|
||||
self, secret_sha256: str
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Every (attacker, decky, service, principal) row sharing this secret hash."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def upsert_credential_reuse(
|
||||
self,
|
||||
*,
|
||||
secret_sha256: str,
|
||||
secret_kind: str,
|
||||
principal: Optional[str],
|
||||
attacker_uuid: Optional[str],
|
||||
attacker_ip: str,
|
||||
decky: str,
|
||||
service: str,
|
||||
attempt_count: int,
|
||||
ts: Optional[Any] = None,
|
||||
) -> Optional[dict[str, Any]]:
|
||||
"""Upsert one credential-reuse finding. Returns the row dict (with
|
||||
``inserted: bool`` mixed in) on insert/update, or None if the row
|
||||
is below the reuse threshold and shouldn't be persisted yet.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def list_credential_reuses(
|
||||
self,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
min_target_count: int = 2,
|
||||
secret_kind: Optional[str] = None,
|
||||
) -> tuple[int, list[dict[str, Any]]]:
|
||||
"""Paged list of credential-reuse findings ordered by target_count desc."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_credential_reuse_by_id(
|
||||
self, reuse_id: str
|
||||
) -> Optional[dict[str, Any]]:
|
||||
"""One credential-reuse finding by UUID, or None."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def update_credential_attacker_uuid(
|
||||
self, attacker_ip: str, attacker_uuid: str
|
||||
) -> int:
|
||||
"""Backfill ``attacker_uuid`` on every Credential row matching the IP
|
||||
whose ``attacker_uuid`` is currently null. Returns rows updated.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_state(self, key: str) -> Optional[dict[str, Any]]:
|
||||
"""Retrieve a specific state entry by key."""
|
||||
|
||||
@@ -32,6 +32,7 @@ from decnet.web.db.models import (
|
||||
Log,
|
||||
Bounty,
|
||||
Credential,
|
||||
CredentialReuse,
|
||||
State,
|
||||
Attacker,
|
||||
AttackerBehavior,
|
||||
@@ -684,7 +685,7 @@ class SQLModelRepository(BaseRepository):
|
||||
out.append(d)
|
||||
return out
|
||||
|
||||
async def get_credential_reuse(
|
||||
async def get_credential_attempts_for_secret(
|
||||
self, secret_sha256: str
|
||||
) -> List[dict[str, Any]]:
|
||||
"""Every (attacker_ip, decky, service, principal) row sharing this
|
||||
@@ -706,6 +707,197 @@ class SQLModelRepository(BaseRepository):
|
||||
out.append(d)
|
||||
return out
|
||||
|
||||
# ─── credential reuse (findings) ──────────────────────────────────────
|
||||
|
||||
async def update_credential_attacker_uuid(
|
||||
self, attacker_ip: str, attacker_uuid: str
|
||||
) -> int:
|
||||
"""Backfill ``attacker_uuid`` on every Credential row matching the
|
||||
given IP whose ``attacker_uuid`` is currently null. Run by the
|
||||
profiler after it mints/updates an Attacker row.
|
||||
"""
|
||||
async with self._session() as session:
|
||||
result = await session.execute(
|
||||
update(Credential)
|
||||
.where(
|
||||
Credential.attacker_ip == attacker_ip,
|
||||
Credential.attacker_uuid.is_(None),
|
||||
)
|
||||
.values(attacker_uuid=attacker_uuid)
|
||||
)
|
||||
await session.commit()
|
||||
return int(result.rowcount or 0)
|
||||
|
||||
@staticmethod
|
||||
def _merge_unique(existing_json: str, value: Optional[str]) -> tuple[str, bool]:
|
||||
"""Append ``value`` to a JSON list[str] column if not present.
|
||||
Returns (new_json, changed). None values and duplicates are skipped.
|
||||
"""
|
||||
if value is None:
|
||||
return existing_json, False
|
||||
try:
|
||||
current = json.loads(existing_json) if existing_json else []
|
||||
if not isinstance(current, list):
|
||||
current = []
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
current = []
|
||||
if value in current:
|
||||
return existing_json, False
|
||||
current.append(value)
|
||||
return json.dumps(current, ensure_ascii=True), True
|
||||
|
||||
async def upsert_credential_reuse(
|
||||
self,
|
||||
*,
|
||||
secret_sha256: str,
|
||||
secret_kind: str,
|
||||
principal: Optional[str],
|
||||
attacker_uuid: Optional[str],
|
||||
attacker_ip: str,
|
||||
decky: str,
|
||||
service: str,
|
||||
attempt_count: int,
|
||||
ts: Optional[datetime] = None,
|
||||
) -> Optional[dict[str, Any]]:
|
||||
"""Upsert a credential-reuse finding.
|
||||
|
||||
The row is keyed by ``(secret_sha256, secret_kind, principal_key)``
|
||||
— ``principal_key`` is the canonicalised non-null form ("" when
|
||||
principal is null) so the unique constraint behaves the same on
|
||||
SQLite and MySQL.
|
||||
|
||||
Returns the row dict augmented with ``inserted: bool`` and
|
||||
``changed: bool`` so the correlator can decide whether to publish
|
||||
a bus event.
|
||||
"""
|
||||
principal_key = principal or ""
|
||||
now = ts or datetime.now(timezone.utc)
|
||||
async with self._session() as session:
|
||||
existing = (await session.execute(
|
||||
select(CredentialReuse).where(
|
||||
CredentialReuse.secret_sha256 == secret_sha256,
|
||||
CredentialReuse.secret_kind == secret_kind,
|
||||
CredentialReuse.principal_key == principal_key,
|
||||
)
|
||||
)).scalar_one_or_none()
|
||||
|
||||
if existing is None:
|
||||
row = CredentialReuse(
|
||||
id=str(uuid.uuid4()),
|
||||
secret_sha256=secret_sha256,
|
||||
secret_kind=secret_kind,
|
||||
principal=principal,
|
||||
principal_key=principal_key,
|
||||
attacker_uuids=json.dumps(
|
||||
[attacker_uuid] if attacker_uuid else [], ensure_ascii=True
|
||||
),
|
||||
attacker_ips=json.dumps([attacker_ip], ensure_ascii=True),
|
||||
deckies=json.dumps([decky], ensure_ascii=True),
|
||||
services=json.dumps([service], ensure_ascii=True),
|
||||
target_count=1,
|
||||
attempt_count=int(attempt_count),
|
||||
confidence=1.0,
|
||||
first_seen=now,
|
||||
last_seen=now,
|
||||
updated_at=now,
|
||||
)
|
||||
session.add(row)
|
||||
await session.commit()
|
||||
await session.refresh(row)
|
||||
d = row.model_dump(mode="json")
|
||||
d["inserted"] = True
|
||||
d["changed"] = True
|
||||
return d
|
||||
|
||||
changed = False
|
||||
new_uuids, c1 = self._merge_unique(existing.attacker_uuids, attacker_uuid)
|
||||
new_ips, c2 = self._merge_unique(existing.attacker_ips, attacker_ip)
|
||||
new_deckies, c3 = self._merge_unique(existing.deckies, decky)
|
||||
new_services, c4 = self._merge_unique(existing.services, service)
|
||||
existing.attacker_uuids = new_uuids
|
||||
existing.attacker_ips = new_ips
|
||||
if c3 or c4:
|
||||
existing.deckies = new_deckies
|
||||
existing.services = new_services
|
||||
# Recount target tuples from the underlying credentials
|
||||
# table — a (decky, service) tuple only counts when both
|
||||
# were observed together, which the JSON lists alone
|
||||
# can't tell us.
|
||||
stmt = (
|
||||
select(func.count(func.distinct(
|
||||
Credential.decky_name + ":" + Credential.service
|
||||
)))
|
||||
.where(
|
||||
Credential.secret_sha256 == secret_sha256,
|
||||
Credential.secret_kind == secret_kind,
|
||||
(Credential.principal == principal) if principal is not None
|
||||
else Credential.principal.is_(None),
|
||||
)
|
||||
)
|
||||
target_count = (await session.execute(stmt)).scalar() or 0
|
||||
existing.target_count = int(target_count)
|
||||
existing.attempt_count = (existing.attempt_count or 0) + int(attempt_count)
|
||||
existing.last_seen = now
|
||||
existing.updated_at = now
|
||||
if c1 or c2 or c3 or c4:
|
||||
changed = True
|
||||
session.add(existing)
|
||||
await session.commit()
|
||||
await session.refresh(existing)
|
||||
d = existing.model_dump(mode="json")
|
||||
d["inserted"] = False
|
||||
d["changed"] = changed
|
||||
return d
|
||||
|
||||
async def list_credential_reuses(
|
||||
self,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
min_target_count: int = 2,
|
||||
secret_kind: Optional[str] = None,
|
||||
) -> tuple[int, List[dict[str, Any]]]:
|
||||
async with self._session() as session:
|
||||
base = select(CredentialReuse).where(
|
||||
CredentialReuse.target_count >= min_target_count
|
||||
)
|
||||
if secret_kind:
|
||||
base = base.where(CredentialReuse.secret_kind == secret_kind)
|
||||
total_stmt = select(func.count()).select_from(base.subquery())
|
||||
total = (await session.execute(total_stmt)).scalar() or 0
|
||||
list_stmt = (
|
||||
base.order_by(desc(CredentialReuse.target_count),
|
||||
desc(CredentialReuse.last_seen))
|
||||
.offset(offset).limit(limit)
|
||||
)
|
||||
rows = (await session.execute(list_stmt)).scalars().all()
|
||||
out: List[dict[str, Any]] = []
|
||||
for r in rows:
|
||||
d = r.model_dump(mode="json")
|
||||
for key in ("attacker_uuids", "attacker_ips", "deckies", "services"):
|
||||
try:
|
||||
d[key] = json.loads(d[key])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
d[key] = []
|
||||
out.append(d)
|
||||
return int(total), out
|
||||
|
||||
async def get_credential_reuse_by_id(
|
||||
self, reuse_id: str
|
||||
) -> Optional[dict[str, Any]]:
|
||||
async with self._session() as session:
|
||||
row = (await session.execute(
|
||||
select(CredentialReuse).where(CredentialReuse.id == reuse_id)
|
||||
)).scalar_one_or_none()
|
||||
if row is None:
|
||||
return None
|
||||
d = row.model_dump(mode="json")
|
||||
for key in ("attacker_uuids", "attacker_ips", "deckies", "services"):
|
||||
try:
|
||||
d[key] = json.loads(d[key])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
d[key] = []
|
||||
return d
|
||||
|
||||
async def get_state(self, key: str) -> Optional[dict[str, Any]]:
|
||||
async with self._session() as session:
|
||||
statement = select(State).where(State.key == key)
|
||||
|
||||
Reference in New Issue
Block a user