feat(db): Campaign SQLModel + repo write/read methods

Adds the campaigns table and the BaseRepository / SQLModelRepository methods that the campaign-clusterer worker (next commit) needs to populate it. Mirrors the AttackerIdentity layer: schema_version from day one for federation gossip, soft-merge via merged_into_uuid with a chain-walking get_campaign_by_uuid, list_campaigns excluding merged- out rows while list_all_campaigns returns the unfiltered set for the revoke pass. attacker_identities.campaign_id gets a real FK now that the target table exists.
2026-04-26 08:54:28 -04:00
parent 059d1dba75
commit 0a1cf65ddb
7 changed files with 524 additions and 3 deletions
--- a/tests/db/test_base_repo.py
+++ b/tests/db/test_base_repo.py
@@ -71,6 +71,17 @@ class DummyRepo(BaseRepository):
    async def set_attacker_identity_id(self, a, i): await super().set_attacker_identity_id(a, i)
    async def list_all_identities(self): await super().list_all_identities(); return []
    async def update_identity_merged_into(self, u, w): await super().update_identity_merged_into(u, w)
+    # Campaign clustering (this PR)
+    async def get_campaign_by_uuid(self, u): await super().get_campaign_by_uuid(u)
+    async def list_campaigns(self, limit=50, offset=0): await super().list_campaigns(limit, offset); return []
+    async def count_campaigns(self): await super().count_campaigns(); return 0
+    async def list_identities_for_campaign(self, u, limit=50, offset=0): await super().list_identities_for_campaign(u, limit, offset); return []
+    async def count_identities_for_campaign(self, u): await super().count_identities_for_campaign(u); return 0
+    async def list_identities_for_clustering(self, limit=None): await super().list_identities_for_clustering(limit); return []
+    async def create_campaign(self, row): await super().create_campaign(row); return ""
+    async def set_identity_campaign_id(self, i, c): await super().set_identity_campaign_id(i, c)
+    async def list_all_campaigns(self): await super().list_all_campaigns(); return []
+    async def update_campaign_merged_into(self, u, w): await super().update_campaign_merged_into(u, w)

@pytest.mark.asyncio
 async def test_base_repo_coverage():
@@ -144,6 +155,18 @@ async def test_base_repo_coverage():
    await dr.list_all_identities()
    await dr.update_identity_merged_into("a", "b")
    await dr.update_identity_merged_into("a", None)
+    await dr.get_campaign_by_uuid("a")
+    await dr.list_campaigns()
+    await dr.count_campaigns()
+    await dr.list_identities_for_campaign("a")
+    await dr.count_identities_for_campaign("a")
+    await dr.list_identities_for_clustering()
+    await dr.create_campaign({"uuid": "c"})
+    await dr.set_identity_campaign_id("i", "c")
+    await dr.set_identity_campaign_id("i", None)
+    await dr.list_all_campaigns()
+    await dr.update_campaign_merged_into("c", "d")
+    await dr.update_campaign_merged_into("c", None)

    # Swarm methods: default NotImplementedError on BaseRepository.  Covering
    # them here keeps the coverage contract honest for the swarm CRUD surface.
--- a/tests/db/test_campaign_repo.py
+++ b/tests/db/test_campaign_repo.py
@@ -0,0 +1,145 @@
+"""Tests for the Campaign clustering repo methods on SQLModelRepository."""
+from __future__ import annotations
+
+from datetime import datetime, timezone
+
+import pytest
+
+from decnet.web.db.factory import get_repository
+
+
+@pytest.fixture
+async def repo(tmp_path):
+    r = get_repository(db_path=str(tmp_path / "campaigns.db"))
+    await r.initialize()
+    return r
+
+
+async def _create_identity(repo, uuid: str, **kwargs) -> str:
+    now = datetime.now(timezone.utc)
+    return await repo.create_attacker_identity({
+        "uuid": uuid,
+        "first_seen_at": kwargs.get("first_seen_at", now),
+        "last_seen_at": kwargs.get("last_seen_at", now),
+        "ja3_hashes": kwargs.get("ja3_hashes"),
+        "hassh_hashes": kwargs.get("hassh_hashes"),
+        "payload_simhashes": kwargs.get("payload_simhashes"),
+        "c2_endpoints": kwargs.get("c2_endpoints"),
+    })
+
+
+@pytest.mark.asyncio
+async def test_create_and_get_campaign(repo):
+    await repo.create_campaign({"uuid": "c1", "confidence": 0.8})
+    row = await repo.get_campaign_by_uuid("c1")
+    assert row is not None
+    assert row["uuid"] == "c1"
+    assert row["confidence"] == 0.8
+    assert row["merged_into_uuid"] is None
+
+
+@pytest.mark.asyncio
+async def test_get_campaign_follows_merge_chain(repo):
+    await repo.create_campaign({"uuid": "c1"})
+    await repo.create_campaign({"uuid": "c2"})
+    await repo.update_campaign_merged_into("c2", "c1")
+
+    # Querying the loser returns the winner.
+    row = await repo.get_campaign_by_uuid("c2")
+    assert row["uuid"] == "c1"
+
+
+@pytest.mark.asyncio
+async def test_list_and_count_excludes_merged_out(repo):
+    await repo.create_campaign({"uuid": "c1"})
+    await repo.create_campaign({"uuid": "c2"})
+    await repo.update_campaign_merged_into("c2", "c1")
+
+    listed = await repo.list_campaigns()
+    assert {c["uuid"] for c in listed} == {"c1"}
+    assert await repo.count_campaigns() == 1
+
+
+@pytest.mark.asyncio
+async def test_list_all_campaigns_includes_merged_out(repo):
+    await repo.create_campaign({"uuid": "c1"})
+    await repo.create_campaign({"uuid": "c2"})
+    await repo.update_campaign_merged_into("c2", "c1")
+
+    all_campaigns = await repo.list_all_campaigns()
+    assert {c["uuid"] for c in all_campaigns} == {"c1", "c2"}
+
+
+@pytest.mark.asyncio
+async def test_get_unknown_campaign_returns_none(repo):
+    assert await repo.get_campaign_by_uuid("nope") is None
+
+
+@pytest.mark.asyncio
+async def test_update_campaign_merged_into_can_revoke(repo):
+    await repo.create_campaign({"uuid": "c1"})
+    await repo.create_campaign({"uuid": "c2"})
+    await repo.update_campaign_merged_into("c2", "c1")
+    # Revoke
+    await repo.update_campaign_merged_into("c2", None)
+
+    row = await repo.get_campaign_by_uuid("c2")
+    assert row["uuid"] == "c2"
+    assert row["merged_into_uuid"] is None
+
+
+@pytest.mark.asyncio
+async def test_set_identity_campaign_id_links_and_unlinks(repo):
+    await repo.create_campaign({"uuid": "c1"})
+    await _create_identity(repo, "i1")
+
+    await repo.set_identity_campaign_id("i1", "c1")
+    linked = await repo.list_identities_for_campaign("c1")
+    assert {i["uuid"] for i in linked} == {"i1"}
+    assert await repo.count_identities_for_campaign("c1") == 1
+
+    await repo.set_identity_campaign_id("i1", None)
+    assert await repo.count_identities_for_campaign("c1") == 0
+
+
+@pytest.mark.asyncio
+async def test_list_identities_for_clustering_projects_expected_fields(repo):
+    await _create_identity(
+        repo, "i1",
+        ja3_hashes='["ja3-a"]',
+        hassh_hashes='["hassh-a"]',
+        payload_simhashes='["dead"]',
+        c2_endpoints='["1.2.3.4:443"]',
+    )
+    rows = await repo.list_identities_for_clustering()
+    assert len(rows) == 1
+    row = rows[0]
+    assert row["uuid"] == "i1"
+    assert row["ja3_hashes"] == '["ja3-a"]'
+    assert row["hassh_hashes"] == '["hassh-a"]'
+    assert row["payload_simhashes"] == '["dead"]'
+    assert row["c2_endpoints"] == '["1.2.3.4:443"]'
+    assert row["campaign_id"] is None
+    assert row["merged_into_uuid"] is None
+    assert row["first_seen_at"] is not None
+
+
+@pytest.mark.asyncio
+async def test_list_identities_for_clustering_respects_limit(repo):
+    for n in range(3):
+        await _create_identity(repo, f"i{n}")
+    assert len(await repo.list_identities_for_clustering(limit=2)) == 2
+    assert len(await repo.list_identities_for_clustering()) == 3
+
+
+@pytest.mark.asyncio
+async def test_list_identities_for_campaign_paginates(repo):
+    await repo.create_campaign({"uuid": "c1"})
+    for n in range(3):
+        await _create_identity(repo, f"i{n}")
+        await repo.set_identity_campaign_id(f"i{n}", "c1")
+
+    page = await repo.list_identities_for_campaign("c1", limit=2, offset=0)
+    assert len(page) == 2
+    page2 = await repo.list_identities_for_campaign("c1", limit=2, offset=2)
+    assert len(page2) == 1