feat(web): read-only /api/v1/identities/* endpoints + repo methods

Second of the five-step identity-resolution substrate. Ships the API surface against the empty AttackerIdentity table from commit 1 — every endpoint returns empty/404 cleanly until the clusterer populates rows. Routes (auth-gated, viewer role): * GET /api/v1/identities — paginated list, excludes merged-out rows * GET /api/v1/identities/{uuid} — detail; transparently follows merged_into_uuid to surface the canonical winner * GET /api/v1/identities/{uuid}/observations — Attacker rows FK'd to the (resolved) identity uuid Repository (BaseRepository abstract + SQLModelRepository concrete): * get_identity_by_uuid (with merge-chain following, hop-bounded) * list_identities / count_identities (excluding merged-out) * list_observations_for_identity / count_observations_for_identity Tests: 12 new (empty-table behavior, seeded data, merge-chain resolution, repo-level smoke against real SQLite). Also fixes the pre-existing test_base_repo_coverage failure (DEBT-041 added abstract methods without updating the DummyRepo stub) — included here because this PR adds 5 more abstract methods, fixing it as a bonus. 474 db/web/profiler/correlation tests green.
2026-04-26 07:08:55 -04:00
parent 84c1ca9c9b
commit dc3d08dd41
9 changed files with 591 additions and 0 deletions
--- a/decnet/web/db/repository.py
+++ b/decnet/web/db/repository.py
@@ -364,6 +364,48 @@ class BaseRepository(ABC):
        """Retrieve the total count of attacker profile records, optionally filtered."""
        pass

+    # ─── Identity resolution (Observation → Identity → Campaign) ───────────
+    # The clusterer that populates these rows is a separate downstream
+    # effort. The read-only API ships first; until the clusterer runs,
+    # every method below returns empty/None against an empty table.
+    # See development/IDENTITY_RESOLUTION.md.
+
+    @abstractmethod
+    async def get_identity_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
+        """
+        Return one ``AttackerIdentity`` row by UUID, or ``None`` if absent.
+
+        If the row has ``merged_into_uuid`` set (i.e. the clusterer
+        soft-merged it into another identity), implementations MUST
+        follow the chain and return the winner — callers should never
+        see a merged-out row as the answer to a fresh query.
+        """
+        pass
+
+    @abstractmethod
+    async def list_identities(
+        self, limit: int = 50, offset: int = 0,
+    ) -> list[dict[str, Any]]:
+        """Paginated list of identity rows, newest-updated first."""
+        pass
+
+    @abstractmethod
+    async def count_identities(self) -> int:
+        """Total identity rows. Excludes merged-out rows."""
+        pass
+
+    @abstractmethod
+    async def list_observations_for_identity(
+        self, identity_uuid: str, limit: int = 50, offset: int = 0,
+    ) -> list[dict[str, Any]]:
+        """``Attacker`` observation rows linked to the given identity, newest first."""
+        pass
+
+    @abstractmethod
+    async def count_observations_for_identity(self, identity_uuid: str) -> int:
+        """Total ``Attacker`` rows FK'd to this identity."""
+        pass
+
    @abstractmethod
    async def get_attacker_commands(
        self,
--- a/decnet/web/db/sqlmodel_repo.py
+++ b/decnet/web/db/sqlmodel_repo.py
@@ -36,6 +36,7 @@ from decnet.web.db.models import (
    State,
    Attacker,
    AttackerBehavior,
+    AttackerIdentity,
    AttackerIntel,
    SessionProfile,
    SmtpTarget,
@@ -1390,6 +1391,83 @@ class SQLModelRepository(BaseRepository):
            result = await session.execute(statement)
            return result.scalar() or 0

+    # ─── Identity resolution reads ────────────────────────────────────────
+
+    async def get_identity_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
+        # Follow merged_into_uuid up to the winner. Loop bounded by
+        # _MAX_MERGE_HOPS so a (hypothetically) corrupted ring can't
+        # spin the worker. Clusterer is responsible for never producing
+        # a cycle; this guard is belt-and-braces.
+        _MAX_MERGE_HOPS = 8
+        async with self._session() as session:
+            current_uuid = uuid
+            for _ in range(_MAX_MERGE_HOPS):
+                result = await session.execute(
+                    select(AttackerIdentity).where(AttackerIdentity.uuid == current_uuid)
+                )
+                identity = result.scalar_one_or_none()
+                if identity is None:
+                    return None
+                if identity.merged_into_uuid is None:
+                    return identity.model_dump(mode="json")
+                current_uuid = identity.merged_into_uuid
+            # Hit the hop cap — surface what we have rather than recurse.
+            return identity.model_dump(mode="json")
+
+    async def list_identities(
+        self, limit: int = 50, offset: int = 0,
+    ) -> list[dict[str, Any]]:
+        # Exclude merged-out rows so the list view is the de-duped truth.
+        # The history is still queryable per-uuid via get_identity_by_uuid
+        # and a future "merged into" endpoint when we need it.
+        statement = (
+            select(AttackerIdentity)
+            .where(AttackerIdentity.merged_into_uuid.is_(None))
+            .order_by(desc(AttackerIdentity.updated_at))
+            .offset(offset)
+            .limit(limit)
+        )
+        async with self._session() as session:
+            result = await session.execute(statement)
+            return [i.model_dump(mode="json") for i in result.scalars().all()]
+
+    async def count_identities(self) -> int:
+        statement = (
+            select(func.count())
+            .select_from(AttackerIdentity)
+            .where(AttackerIdentity.merged_into_uuid.is_(None))
+        )
+        async with self._session() as session:
+            result = await session.execute(statement)
+            return result.scalar() or 0
+
+    async def list_observations_for_identity(
+        self, identity_uuid: str, limit: int = 50, offset: int = 0,
+    ) -> list[dict[str, Any]]:
+        statement = (
+            select(Attacker)
+            .where(Attacker.identity_id == identity_uuid)
+            .order_by(desc(Attacker.last_seen))
+            .offset(offset)
+            .limit(limit)
+        )
+        async with self._session() as session:
+            result = await session.execute(statement)
+            return [
+                self._deserialize_attacker(a.model_dump(mode="json"))
+                for a in result.scalars().all()
+            ]
+
+    async def count_observations_for_identity(self, identity_uuid: str) -> int:
+        statement = (
+            select(func.count())
+            .select_from(Attacker)
+            .where(Attacker.identity_id == identity_uuid)
+        )
+        async with self._session() as session:
+            result = await session.execute(statement)
+            return result.scalar() or 0
+
    async def get_attacker_commands(
        self,
        uuid: str,