Files
DECNET/decnet/web/db/repository.py
anti 3eb67c9400 refactor(intel): re-key attacker_intel on attacker_uuid (closes DEBT-041)
The threat-intel surface was IP-keyed on day one as an expedient — the
worker is woken by IP-bearing bus events. ANTI's call: don't carry that
debt. NO IPs as primary keys anywhere on the attacker-intel surface.

Schema:
- attacker_uuid is now the canonical key — UNIQUE + FK to attackers.uuid.
- attacker_ip stays as a denormalised, indexed, NON-UNIQUE value column.
  Updated on every upsert; useful for SIEM payloads and audit lookups,
  but explicitly NOT a key. Model docstring says so.
- Pre-v1, no Alembic migration needed. SQLModel.metadata.create_all()
  builds the new shape on fresh DBs.

Repo:
- upsert_attacker_intel now keys on attacker_uuid.
- get_attacker_intel_by_ip → get_attacker_intel_by_uuid.
- get_unenriched_attacker_ips → get_unenriched_attackers, returning
  [{uuid, ip}] tuples so the worker writes by UUID and dispatches
  provider calls by IP without a second round-trip.

Worker:
- _enrich_one(uuid, ip, ...) — UUID lands on the row, IP rides for
  provider egress.
- attacker.intel.enriched bus payload gains attacker_uuid alongside
  attacker_ip — webhook → SIEM consumers benefit; no removal.

API:
- GET /api/v1/attackers/{ip}/intel deleted outright (rip-and-replace,
  never deployed beyond dev).
- GET /api/v1/attackers/{uuid}/intel is the only public route, matching
  every other /attackers/* route.

Frontend:
- <IntelPanel uuid={id!} /> uses the URL param directly, fetches in
  parallel with the rest of AttackerDetail rather than waiting on
  attacker.ip.

Tests: re-keyed in place, 39 passed (same coverage as before the
refactor). Provider-impl tests untouched.

DEBT-041: closed in DEBT.md (entry preserved as historical rationale,
summary table flipped to , remaining-open list shortened by one).
2026-04-26 05:35:29 -04:00

643 lines
22 KiB
Python

from abc import ABC, abstractmethod
from typing import Any, Optional
class BaseRepository(ABC):
"""Abstract base class for DECNET web dashboard data storage."""
@abstractmethod
async def initialize(self) -> None:
"""Initialize the database schema."""
pass
@abstractmethod
async def add_log(self, log_data: dict[str, Any]) -> None:
"""Add a new log entry to the database."""
pass
async def add_logs(self, log_entries: list[dict[str, Any]]) -> None:
"""Bulk-insert log entries in a single transaction.
Default implementation falls back to per-row add_log; concrete
repositories should override for a real single-commit insert.
"""
for _entry in log_entries:
await self.add_log(_entry)
@abstractmethod
async def get_logs(
self,
limit: int = 50,
offset: int = 0,
search: Optional[str] = None
) -> list[dict[str, Any]]:
"""Retrieve paginated log entries."""
pass
@abstractmethod
async def get_total_logs(self, search: Optional[str] = None) -> int:
"""Retrieve the total count of logs, optionally filtered by search."""
pass
@abstractmethod
async def get_stats_summary(self) -> dict[str, Any]:
"""Retrieve high-level dashboard metrics."""
pass
@abstractmethod
async def get_deckies(self) -> list[dict[str, Any]]:
"""Retrieve the list of currently deployed deckies."""
pass
@abstractmethod
async def get_user_by_username(self, username: str) -> Optional[dict[str, Any]]:
"""Retrieve a user by their username."""
pass
@abstractmethod
async def get_user_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
"""Retrieve a user by their UUID."""
pass
@abstractmethod
async def create_user(self, user_data: dict[str, Any]) -> None:
"""Create a new dashboard user."""
pass
@abstractmethod
async def update_user_password(self, uuid: str, password_hash: str, must_change_password: bool = False) -> None:
"""Update a user's password and change the must_change_password flag."""
pass
@abstractmethod
async def list_users(self) -> list[dict[str, Any]]:
"""Retrieve all users (caller must strip password_hash before returning to clients)."""
pass
@abstractmethod
async def delete_user(self, uuid: str) -> bool:
"""Delete a user by UUID. Returns True if user was found and deleted."""
pass
@abstractmethod
async def update_user_role(self, uuid: str, role: str) -> None:
"""Update a user's role."""
pass
@abstractmethod
async def purge_logs_and_bounties(self) -> dict[str, int]:
"""Delete all logs, bounties, and attacker profiles. Returns counts of deleted rows."""
pass
@abstractmethod
async def add_bounty(self, bounty_data: dict[str, Any]) -> None:
"""Add a new harvested artifact (bounty) to the database."""
pass
@abstractmethod
async def get_bounties(
self,
limit: int = 50,
offset: int = 0,
bounty_type: Optional[str] = None,
search: Optional[str] = None
) -> list[dict[str, Any]]:
"""Retrieve paginated bounty entries."""
pass
@abstractmethod
async def get_total_bounties(self, bounty_type: Optional[str] = None, search: Optional[str] = None) -> int:
"""Retrieve the total count of bounties, optionally filtered."""
pass
# ---- credentials ---------------------------------------------------
@abstractmethod
async def upsert_credential(self, data: dict[str, Any]) -> int:
"""Insert or upsert a credential attempt; returns the row id.
Dedup tuple: (attacker_ip, decky_name, service, secret_sha256,
principal_or_None). On dedup match, ``attempt_count`` is bumped
and ``last_seen`` updated; the originally-seen ``first_seen``
and ``fields`` JSON are preserved.
"""
pass
@abstractmethod
async def get_credentials(
self,
limit: int = 50,
offset: int = 0,
search: Optional[str] = None,
service: Optional[str] = None,
attacker_ip: Optional[str] = None,
) -> list[dict[str, Any]]:
"""Paginated credential rows, with optional filters."""
pass
@abstractmethod
async def get_total_credentials(
self,
search: Optional[str] = None,
service: Optional[str] = None,
attacker_ip: Optional[str] = None,
) -> int:
"""Total credential count under the same filters as get_credentials."""
pass
@abstractmethod
async def get_credentials_for_attacker(
self, attacker_ip: str
) -> list[dict[str, Any]]:
"""Every credential row from the given attacker IP."""
pass
@abstractmethod
async def get_credential_attempts_for_secret(
self, secret_sha256: str
) -> list[dict[str, Any]]:
"""Every (attacker, decky, service, principal) row sharing this secret hash."""
pass
@abstractmethod
async def upsert_credential_reuse(
self,
*,
secret_sha256: str,
secret_kind: str,
principal: Optional[str],
attacker_uuid: Optional[str],
attacker_ip: str,
decky: str,
service: str,
attempt_count: int,
ts: Optional[Any] = None,
) -> Optional[dict[str, Any]]:
"""Upsert one credential-reuse finding. Returns the row dict (with
``inserted: bool`` mixed in) on insert/update, or None if the row
is below the reuse threshold and shouldn't be persisted yet.
"""
pass
@abstractmethod
async def find_credential_reuse_candidates(
self, min_targets: int = 2
) -> list[dict[str, Any]]:
"""Group ``credentials`` by ``(secret_sha256, secret_kind, principal)``
and return groups whose distinct ``(decky_name, service)`` count is
at least *min_targets*. Each entry has the group key, the
``target_count``, and the underlying credential rows for the
correlator to fold into ``CredentialReuse``.
"""
pass
@abstractmethod
async def list_credential_reuses(
self,
limit: int = 50,
offset: int = 0,
min_target_count: int = 2,
secret_kind: Optional[str] = None,
) -> tuple[int, list[dict[str, Any]]]:
"""Paged list of credential-reuse findings ordered by target_count desc."""
pass
@abstractmethod
async def get_credential_reuse_by_id(
self, reuse_id: str
) -> Optional[dict[str, Any]]:
"""One credential-reuse finding by UUID, or None."""
pass
@abstractmethod
async def update_credential_attacker_uuid(
self, attacker_ip: str, attacker_uuid: str
) -> int:
"""Backfill ``attacker_uuid`` on every Credential row matching the IP
whose ``attacker_uuid`` is currently null. Returns rows updated.
"""
pass
@abstractmethod
async def get_state(self, key: str) -> Optional[dict[str, Any]]:
"""Retrieve a specific state entry by key."""
pass
@abstractmethod
async def set_state(self, key: str, value: Any) -> None:
"""Store a specific state entry by key."""
pass
@abstractmethod
async def get_max_log_id(self) -> int:
"""Return the highest log ID, or 0 if the table is empty."""
pass
@abstractmethod
async def get_logs_after_id(self, last_id: int, limit: int = 500) -> list[dict[str, Any]]:
"""Return logs with id > last_id, ordered by id ASC, up to limit."""
pass
@abstractmethod
async def get_all_bounties_by_ip(self) -> dict[str, list[dict[str, Any]]]:
"""Retrieve all bounty rows grouped by attacker_ip."""
pass
@abstractmethod
async def get_bounties_for_ips(self, ips: set[str]) -> dict[str, list[dict[str, Any]]]:
"""Retrieve bounty rows grouped by attacker_ip, filtered to only the given IPs."""
pass
@abstractmethod
async def upsert_attacker(self, data: dict[str, Any]) -> str:
"""Insert or replace an attacker profile record. Returns the row's UUID."""
pass
@abstractmethod
async def upsert_attacker_behavior(self, attacker_uuid: str, data: dict[str, Any]) -> None:
"""Insert or replace the behavioral/fingerprint row for an attacker."""
pass
@abstractmethod
async def get_attacker_behavior(self, attacker_uuid: str) -> Optional[dict[str, Any]]:
"""Retrieve the behavioral/fingerprint row for an attacker UUID."""
pass
@abstractmethod
async def get_behaviors_for_ips(self, ips: set[str]) -> dict[str, dict[str, Any]]:
"""Bulk-fetch behavior rows keyed by attacker IP (JOIN to attackers)."""
pass
@abstractmethod
async def upsert_session_profile(self, sid: str, data: dict[str, Any]) -> None:
"""Insert or update the keystroke-dynamics profile row for a session."""
pass
@abstractmethod
async def get_session_profile(self, sid: str) -> Optional[dict[str, Any]]:
"""Retrieve the keystroke-dynamics profile row for a session."""
pass
@abstractmethod
async def upsert_attacker_intel(self, data: dict[str, Any]) -> str:
"""Insert or update the threat-intel row for an attacker UUID.
``data`` MUST include ``attacker_uuid``, ``attacker_ip`` and
``expires_at``. Returns the row UUID. Keyed on ``attacker_uuid``
(UNIQUE + FK to ``attackers.uuid``); ``attacker_ip`` is denormalised
— it gets overwritten on every upsert if the attacker rotates IPs.
"""
pass
@abstractmethod
async def get_attacker_intel_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
"""Return the threat-intel row for ``uuid`` or ``None`` if missing."""
pass
@abstractmethod
async def get_unenriched_attackers(
self, limit: int = 100,
) -> list[dict[str, Any]]:
"""List ``{"uuid", "ip"}`` pairs for attackers with no intel row OR
whose row is past ``expires_at``.
Used by the enrich worker to backfill on startup and on each wake.
Returns both fields so the worker can write keyed on UUID without
a second per-attacker DB round-trip to resolve the IP for outbound
provider calls.
"""
pass
@abstractmethod
async def increment_smtp_target(self, attacker_uuid: str, domain: str) -> None:
"""
Record that ``attacker_uuid`` targeted ``domain`` via SMTP.
Upserts the (attacker_uuid, domain) row: inserts with count=1 +
first_seen=now on first sight, bumps count + last_seen on every
subsequent hit. Callers must pre-normalize ``domain`` (lowercase,
local-part stripped).
"""
pass
@abstractmethod
async def list_smtp_targets(self, attacker_uuid: str) -> list[dict[str, Any]]:
"""Return SmtpTarget rows for an attacker, ordered by most-recent first."""
pass
@abstractmethod
async def get_attacker_stored_mail(self, uuid: str) -> list[Any]:
"""Return `message_stored` log rows for an attacker, newest first."""
pass
@abstractmethod
async def smtp_target_seen(self, domain: str) -> dict[str, Any]:
"""
Cross-attacker aggregate for a victim domain.
Returns ``{seen: bool, count: int, first_seen: datetime|None,
last_seen: datetime|None}``. Shaped as the federation-gossip RPC
that V2 will expose — each operator can answer "have any of your
attackers targeted this domain?" without leaking attacker identity.
"""
pass
@abstractmethod
async def get_attacker_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
"""Retrieve a single attacker profile by UUID."""
pass
@abstractmethod
async def get_attackers(
self,
limit: int = 50,
offset: int = 0,
search: Optional[str] = None,
sort_by: str = "recent",
service: Optional[str] = None,
) -> list[dict[str, Any]]:
"""Retrieve paginated attacker profile records."""
pass
@abstractmethod
async def get_total_attackers(self, search: Optional[str] = None, service: Optional[str] = None) -> int:
"""Retrieve the total count of attacker profile records, optionally filtered."""
pass
@abstractmethod
async def get_attacker_commands(
self,
uuid: str,
limit: int = 50,
offset: int = 0,
service: Optional[str] = None,
) -> dict[str, Any]:
"""Retrieve paginated commands for an attacker, optionally filtered by service."""
pass
@abstractmethod
async def get_attacker_artifacts(self, uuid: str) -> list[dict[str, Any]]:
"""Return `file_captured` log rows for this attacker, newest first."""
pass
@abstractmethod
async def get_attacker_transcripts(self, uuid: str) -> list[dict[str, Any]]:
"""Return `session_recorded` log rows for this attacker, newest first."""
pass
async def get_attacker_service_activity(
self, attacker_uuid: str
) -> list[tuple[str, str]]:
"""Return the distinct ``(service, event_type)`` pairs observed
for one attacker, for bucketing into scanned vs. interacted
services. Default is NotImplementedError so non-SQLModel backends
must opt in; SQLModelRepository overrides with a cheap DISTINCT
query."""
raise NotImplementedError
async def get_attacker_ip_leaks(
self, attacker_uuid: str, *, limit: int = 10,
) -> list[dict[str, Any]]:
"""Return up to ``limit`` ``bounty_type='ip_leak'`` rows for the
attacker, newest first. Each row's payload carries the TCP
source IP, the header that leaked, and the claimed real IP —
see the XFF-mismatch extractor in ``decnet.web.ingester`` for
the shape. Caller pairs with :meth:`count_attacker_ip_leaks`
to detect XFF-rotation (100+ claimed IPs from one source)."""
raise NotImplementedError
async def count_attacker_ip_leaks(self, attacker_uuid: str) -> int:
"""Total number of ``ip_leak`` bounties recorded for this
attacker. Used to detect XFF-rotation signal where the attacker
cycles through many claimed IPs (WAF-bypass-list probing)."""
raise NotImplementedError
@abstractmethod
async def get_session_log(self, sid: str) -> Optional[dict[str, Any]]:
"""Look up the `session_recorded` Log row for a given session UUID."""
pass
# ------------------------------------------------------------- swarm
# Swarm methods have default no-op / empty implementations so existing
# subclasses and non-swarm deployments continue to work without change.
async def add_swarm_host(self, data: dict[str, Any]) -> None:
raise NotImplementedError
async def get_swarm_host_by_name(self, name: str) -> Optional[dict[str, Any]]:
raise NotImplementedError
async def get_swarm_host_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
raise NotImplementedError
async def get_swarm_host_by_fingerprint(self, fingerprint: str) -> Optional[dict[str, Any]]:
raise NotImplementedError
async def list_swarm_hosts(self, status: Optional[str] = None) -> list[dict[str, Any]]:
raise NotImplementedError
async def update_swarm_host(self, uuid: str, fields: dict[str, Any]) -> None:
raise NotImplementedError
async def delete_swarm_host(self, uuid: str) -> bool:
raise NotImplementedError
async def upsert_decky_shard(self, data: dict[str, Any]) -> None:
raise NotImplementedError
async def list_decky_shards(self, host_uuid: Optional[str] = None) -> list[dict[str, Any]]:
raise NotImplementedError
async def delete_decky_shards_for_host(self, host_uuid: str) -> int:
raise NotImplementedError
async def delete_decky_shard(self, decky_name: str) -> bool:
raise NotImplementedError
# ----------------------------------------------------------- mazenet
# MazeNET topology persistence. Default no-op / NotImplementedError so
# non-default backends stay functional; SQLModelRepository provides the
# real implementation used by SQLite and MySQL.
async def create_topology(self, data: dict[str, Any]) -> str:
raise NotImplementedError
async def get_topology(self, topology_id: str) -> Optional[dict[str, Any]]:
raise NotImplementedError
async def list_topologies(
self,
status: Optional[str] = None,
limit: Optional[int] = None,
offset: Optional[int] = None,
) -> list[dict[str, Any]]:
raise NotImplementedError
async def count_topologies(self, status: Optional[str] = None) -> int:
raise NotImplementedError
async def update_topology_status(
self,
topology_id: str,
new_status: str,
reason: Optional[str] = None,
) -> None:
raise NotImplementedError
async def delete_topology_cascade(self, topology_id: str) -> bool:
raise NotImplementedError
async def set_topology_resync(self, topology_id: str, value: bool) -> None:
raise NotImplementedError
async def list_topologies_needing_resync(self) -> list[dict[str, Any]]:
raise NotImplementedError
async def add_lan(self, data: dict[str, Any]) -> str:
raise NotImplementedError
async def update_lan(
self,
lan_id: str,
fields: dict[str, Any],
*,
expected_version: Optional[int] = None,
enforce_pending: bool = False,
) -> None:
raise NotImplementedError
async def list_lans_for_topology(
self, topology_id: str
) -> list[dict[str, Any]]:
raise NotImplementedError
async def add_topology_decky(self, data: dict[str, Any]) -> str:
raise NotImplementedError
async def update_topology_decky(
self,
decky_uuid: str,
fields: dict[str, Any],
*,
expected_version: Optional[int] = None,
enforce_pending: bool = False,
) -> None:
raise NotImplementedError
async def list_topology_deckies(
self, topology_id: str
) -> list[dict[str, Any]]:
raise NotImplementedError
async def add_topology_edge(self, data: dict[str, Any]) -> str:
raise NotImplementedError
async def list_topology_edges(
self, topology_id: str
) -> list[dict[str, Any]]:
raise NotImplementedError
async def list_topology_status_events(
self, topology_id: str, limit: int = 100
) -> list[dict[str, Any]]:
raise NotImplementedError
# -------------------- pre-deploy (pending-only) mutations --------------------
async def delete_lan(
self, lan_id: str, *, expected_version: Optional[int] = None
) -> None:
raise NotImplementedError
async def delete_topology_decky(
self, decky_uuid: str, *, expected_version: Optional[int] = None
) -> None:
raise NotImplementedError
async def delete_topology_edge(
self, edge_id: str, *, expected_version: Optional[int] = None
) -> None:
raise NotImplementedError
# -------------------- live mutation queue (reconciler) --------------------
async def enqueue_topology_mutation(
self,
topology_id: str,
op: str,
payload: dict[str, Any],
*,
expected_version: Optional[int] = None,
) -> str:
raise NotImplementedError
async def claim_next_mutation(
self, topology_id: str
) -> Optional[dict[str, Any]]:
raise NotImplementedError
async def mark_mutation_applied(self, mutation_id: str) -> None:
raise NotImplementedError
async def mark_mutation_failed(
self, mutation_id: str, reason: str
) -> None:
raise NotImplementedError
async def list_topology_mutations(
self,
topology_id: str,
state: Optional[str] = None,
) -> list[dict[str, Any]]:
raise NotImplementedError
async def has_pending_topology_mutation(self) -> bool:
return False
async def list_live_topology_ids(self) -> list[str]:
return []
# --------------------------------------------------------- webhooks
# Webhook subscriptions — external SIEM / SOAR egress configuration.
# Default NotImplementedError keeps non-default backends honest; the
# SQLModel-backed SQLite and MySQL repos override everything below.
async def create_webhook_subscription(self, data: dict[str, Any]) -> None:
raise NotImplementedError
async def get_webhook_subscription(self, uuid: str) -> Optional[dict[str, Any]]:
raise NotImplementedError
async def get_webhook_subscription_by_name(
self, name: str
) -> Optional[dict[str, Any]]:
raise NotImplementedError
async def list_webhook_subscriptions(
self, enabled_only: bool = False
) -> list[dict[str, Any]]:
raise NotImplementedError
async def update_webhook_subscription(
self, uuid: str, patch: dict[str, Any]
) -> bool:
raise NotImplementedError
async def delete_webhook_subscription(self, uuid: str) -> bool:
raise NotImplementedError
async def record_webhook_success(self, uuid: str, ts: Any) -> None:
raise NotImplementedError
async def record_webhook_failure(
self, uuid: str, ts: Any, error: str
) -> int:
"""Record a failed delivery; return the new ``consecutive_failures``
count so the caller can decide whether to trip the circuit."""
raise NotImplementedError
async def trip_webhook_circuit(self, uuid: str, ts: Any) -> None:
"""Auto-disable a subscription after repeated failures. Sets
``enabled=False`` and stamps ``auto_disabled_at``."""
raise NotImplementedError