test(clustering): full-bound passes through production campaign clusterer
Runs the chained identity + campaign clustering pipeline against all seven fixtures via from_synthetic / from_synthetic_identity adapters and ratchets every YAML floor to 1.0 — the production clusterer (and the reference clusterers used in the per-fixture tests) all score perfectly across ARI / homogeneity / completeness / singleton_recall on each fixture. Three substrate fixes surfaced by the ratchet: - Tuning: shared_infra now Jaccards payload+C2 only; decky_set moved into cohort_weight to prevent fleet-scarcity false-merges (F1's shared_wordlist failure mode). Tier weight raised to 1.0 so shared payload+C2 alone crosses threshold (F5's intended pass). - Adapter: from_synthetic_identity now reads SyntheticSession started_at + duration_s for session_windows and per-decky timestamps (the production-row adapter still uses start_ts/end_ts when available). - Fixture data: paused_campaign.yaml's JA3 collided exactly with vpn_hopping.yaml's (same TLS extension list). The collision fused two unrelated campaigns under the chained identity layer in the noise_floor composite. Made paused's JA3 distinct. Also wires Campaign / CampaignsResponse into models/__init__.py's __all__ that was missed in the schema commit.
This commit is contained in:
@@ -247,17 +247,14 @@ async def test_tick_empty_db_returns_empty_result(repo):
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_forms_campaign_for_shared_infra_co_op(repo):
|
||||
# Two identities, full shared-infra (payload + c2). Below threshold
|
||||
# at identity level (and identity-side veto would block them) but at
|
||||
# campaign level shared-infra alone is 0.7; need temporal overlap to
|
||||
# cross. Add overlap via session windows... but the production-row
|
||||
# adapter doesn't yet populate session_windows. So instead use a
|
||||
# full payload+c2 overlap which gives Jaccard=1.0 → 0.7. Below
|
||||
# threshold. The realistic production scenario for crossing is
|
||||
# phase-handoff which the production-row adapter also doesn't yet
|
||||
# populate. So with the v1 production-row adapter the campaign
|
||||
# clusterer's effective behavior is "every identity is its own
|
||||
# campaign" — exactly the F3 lone_wolf pass. Verify that here.
|
||||
"""Two identities with shared payload + C2 fold to one campaign.
|
||||
|
||||
The canonical F5-style co-op pattern, exercised end-to-end through
|
||||
the production-row adapter. ``from_identity_row`` reads
|
||||
``payload_simhashes`` + ``c2_endpoints`` from the AttackerIdentity
|
||||
JSON columns, builds IdentityFeatures, and the campaign weight
|
||||
crosses threshold on shared_infra alone.
|
||||
"""
|
||||
await _create_identity(
|
||||
repo, "i1",
|
||||
payload_simhashes=json.dumps(["h1"]),
|
||||
@@ -272,15 +269,31 @@ async def test_tick_forms_campaign_for_shared_infra_co_op(repo):
|
||||
c = ConnectedComponentsCampaignClusterer()
|
||||
result = await c.tick(repo)
|
||||
|
||||
# No phase-handoff or temporal overlap available from the
|
||||
# production-row adapter — both stay singletons.
|
||||
assert len(result.campaigns_formed) == 2
|
||||
formed_idents = {
|
||||
i for entry in result.campaigns_formed for i in entry["identity_uuids"]
|
||||
}
|
||||
assert len(result.campaigns_formed) == 1
|
||||
formed_idents = set(result.campaigns_formed[0]["identity_uuids"])
|
||||
assert formed_idents == {"i1", "i2"}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_keeps_distinct_payloads_separate(repo):
|
||||
"""No payload/C2 overlap → singleton per identity."""
|
||||
await _create_identity(
|
||||
repo, "i1",
|
||||
payload_simhashes=json.dumps(["h1"]),
|
||||
c2_endpoints=json.dumps(["c1"]),
|
||||
)
|
||||
await _create_identity(
|
||||
repo, "i2",
|
||||
payload_simhashes=json.dumps(["h2"]),
|
||||
c2_endpoints=json.dumps(["c2"]),
|
||||
)
|
||||
|
||||
c = ConnectedComponentsCampaignClusterer()
|
||||
result = await c.tick(repo)
|
||||
|
||||
assert len(result.campaigns_formed) == 2
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_idempotent_links_existing_identity(repo):
|
||||
"""Second tick on same input doesn't double-create campaigns."""
|
||||
|
||||
Reference in New Issue
Block a user