feat(db): Campaign SQLModel + repo write/read methods

Adds the campaigns table and the BaseRepository / SQLModelRepository
methods that the campaign-clusterer worker (next commit) needs to
populate it. Mirrors the AttackerIdentity layer: schema_version from
day one for federation gossip, soft-merge via merged_into_uuid with a
chain-walking get_campaign_by_uuid, list_campaigns excluding merged-
out rows while list_all_campaigns returns the unfiltered set for the
revoke pass. attacker_identities.campaign_id gets a real FK now that
the target table exists.
This commit is contained in:
2026-04-26 08:54:28 -04:00
parent 059d1dba75
commit 0a1cf65ddb
7 changed files with 524 additions and 3 deletions

View File

@@ -71,6 +71,17 @@ class DummyRepo(BaseRepository):
async def set_attacker_identity_id(self, a, i): await super().set_attacker_identity_id(a, i)
async def list_all_identities(self): await super().list_all_identities(); return []
async def update_identity_merged_into(self, u, w): await super().update_identity_merged_into(u, w)
# Campaign clustering (this PR)
async def get_campaign_by_uuid(self, u): await super().get_campaign_by_uuid(u)
async def list_campaigns(self, limit=50, offset=0): await super().list_campaigns(limit, offset); return []
async def count_campaigns(self): await super().count_campaigns(); return 0
async def list_identities_for_campaign(self, u, limit=50, offset=0): await super().list_identities_for_campaign(u, limit, offset); return []
async def count_identities_for_campaign(self, u): await super().count_identities_for_campaign(u); return 0
async def list_identities_for_clustering(self, limit=None): await super().list_identities_for_clustering(limit); return []
async def create_campaign(self, row): await super().create_campaign(row); return ""
async def set_identity_campaign_id(self, i, c): await super().set_identity_campaign_id(i, c)
async def list_all_campaigns(self): await super().list_all_campaigns(); return []
async def update_campaign_merged_into(self, u, w): await super().update_campaign_merged_into(u, w)
@pytest.mark.asyncio
async def test_base_repo_coverage():
@@ -144,6 +155,18 @@ async def test_base_repo_coverage():
await dr.list_all_identities()
await dr.update_identity_merged_into("a", "b")
await dr.update_identity_merged_into("a", None)
await dr.get_campaign_by_uuid("a")
await dr.list_campaigns()
await dr.count_campaigns()
await dr.list_identities_for_campaign("a")
await dr.count_identities_for_campaign("a")
await dr.list_identities_for_clustering()
await dr.create_campaign({"uuid": "c"})
await dr.set_identity_campaign_id("i", "c")
await dr.set_identity_campaign_id("i", None)
await dr.list_all_campaigns()
await dr.update_campaign_merged_into("c", "d")
await dr.update_campaign_merged_into("c", None)
# Swarm methods: default NotImplementedError on BaseRepository. Covering
# them here keeps the coverage contract honest for the swarm CRUD surface.

View File

@@ -0,0 +1,145 @@
"""Tests for the Campaign clustering repo methods on SQLModelRepository."""
from __future__ import annotations
from datetime import datetime, timezone
import pytest
from decnet.web.db.factory import get_repository
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "campaigns.db"))
await r.initialize()
return r
async def _create_identity(repo, uuid: str, **kwargs) -> str:
now = datetime.now(timezone.utc)
return await repo.create_attacker_identity({
"uuid": uuid,
"first_seen_at": kwargs.get("first_seen_at", now),
"last_seen_at": kwargs.get("last_seen_at", now),
"ja3_hashes": kwargs.get("ja3_hashes"),
"hassh_hashes": kwargs.get("hassh_hashes"),
"payload_simhashes": kwargs.get("payload_simhashes"),
"c2_endpoints": kwargs.get("c2_endpoints"),
})
@pytest.mark.asyncio
async def test_create_and_get_campaign(repo):
await repo.create_campaign({"uuid": "c1", "confidence": 0.8})
row = await repo.get_campaign_by_uuid("c1")
assert row is not None
assert row["uuid"] == "c1"
assert row["confidence"] == 0.8
assert row["merged_into_uuid"] is None
@pytest.mark.asyncio
async def test_get_campaign_follows_merge_chain(repo):
await repo.create_campaign({"uuid": "c1"})
await repo.create_campaign({"uuid": "c2"})
await repo.update_campaign_merged_into("c2", "c1")
# Querying the loser returns the winner.
row = await repo.get_campaign_by_uuid("c2")
assert row["uuid"] == "c1"
@pytest.mark.asyncio
async def test_list_and_count_excludes_merged_out(repo):
await repo.create_campaign({"uuid": "c1"})
await repo.create_campaign({"uuid": "c2"})
await repo.update_campaign_merged_into("c2", "c1")
listed = await repo.list_campaigns()
assert {c["uuid"] for c in listed} == {"c1"}
assert await repo.count_campaigns() == 1
@pytest.mark.asyncio
async def test_list_all_campaigns_includes_merged_out(repo):
await repo.create_campaign({"uuid": "c1"})
await repo.create_campaign({"uuid": "c2"})
await repo.update_campaign_merged_into("c2", "c1")
all_campaigns = await repo.list_all_campaigns()
assert {c["uuid"] for c in all_campaigns} == {"c1", "c2"}
@pytest.mark.asyncio
async def test_get_unknown_campaign_returns_none(repo):
assert await repo.get_campaign_by_uuid("nope") is None
@pytest.mark.asyncio
async def test_update_campaign_merged_into_can_revoke(repo):
await repo.create_campaign({"uuid": "c1"})
await repo.create_campaign({"uuid": "c2"})
await repo.update_campaign_merged_into("c2", "c1")
# Revoke
await repo.update_campaign_merged_into("c2", None)
row = await repo.get_campaign_by_uuid("c2")
assert row["uuid"] == "c2"
assert row["merged_into_uuid"] is None
@pytest.mark.asyncio
async def test_set_identity_campaign_id_links_and_unlinks(repo):
await repo.create_campaign({"uuid": "c1"})
await _create_identity(repo, "i1")
await repo.set_identity_campaign_id("i1", "c1")
linked = await repo.list_identities_for_campaign("c1")
assert {i["uuid"] for i in linked} == {"i1"}
assert await repo.count_identities_for_campaign("c1") == 1
await repo.set_identity_campaign_id("i1", None)
assert await repo.count_identities_for_campaign("c1") == 0
@pytest.mark.asyncio
async def test_list_identities_for_clustering_projects_expected_fields(repo):
await _create_identity(
repo, "i1",
ja3_hashes='["ja3-a"]',
hassh_hashes='["hassh-a"]',
payload_simhashes='["dead"]',
c2_endpoints='["1.2.3.4:443"]',
)
rows = await repo.list_identities_for_clustering()
assert len(rows) == 1
row = rows[0]
assert row["uuid"] == "i1"
assert row["ja3_hashes"] == '["ja3-a"]'
assert row["hassh_hashes"] == '["hassh-a"]'
assert row["payload_simhashes"] == '["dead"]'
assert row["c2_endpoints"] == '["1.2.3.4:443"]'
assert row["campaign_id"] is None
assert row["merged_into_uuid"] is None
assert row["first_seen_at"] is not None
@pytest.mark.asyncio
async def test_list_identities_for_clustering_respects_limit(repo):
for n in range(3):
await _create_identity(repo, f"i{n}")
assert len(await repo.list_identities_for_clustering(limit=2)) == 2
assert len(await repo.list_identities_for_clustering()) == 3
@pytest.mark.asyncio
async def test_list_identities_for_campaign_paginates(repo):
await repo.create_campaign({"uuid": "c1"})
for n in range(3):
await _create_identity(repo, f"i{n}")
await repo.set_identity_campaign_id(f"i{n}", "c1")
page = await repo.list_identities_for_campaign("c1", limit=2, offset=0)
assert len(page) == 2
page2 = await repo.list_identities_for_campaign("c1", limit=2, offset=2)
assert len(page2) == 1