diff --git a/tests/clustering/test_noise_floor_fixture.py b/tests/clustering/test_noise_floor_fixture.py new file mode 100644 index 00000000..86c3ab7d --- /dev/null +++ b/tests/clustering/test_noise_floor_fixture.py @@ -0,0 +1,167 @@ +""" +End-to-end pipeline test for fixture 6 (noise_floor). + +Composite corpus: bundles all five prior fixtures' campaigns + 10 +Delivery-only noise scanners on top of lone_wolf's 8 inherited +ones. The fixture exists to catch cross-corpus interference — +signal collisions, factory ID re-use, clusterer ambiguity that +shows up only when multiple campaigns are scored together. Each +constituent fixture already ships its own in-fixture adversarial +test; fixture 6 covers a different failure class. + +The composition is declared in `noise_floor.yaml` via an +``include_fixtures`` block (a fixture-6-specific format). The +loader in this test file expands it into a full +``corpus.campaigns`` spec at runtime, so the factory itself stays +unaware of the include mechanism. + +Three tests cover this: + +1. `test_noise_floor_corpus_integrity` — every constituent + fixture's campaigns + actors are present in the merged corpus + with their truth labels intact, and the 10 extra noise scanners + are present alongside lone_wolf's 8 (truth-singletons all). + +2. `test_noise_floor_pipeline_passes_bounds` — runs + `composite_signals_clusterer` against the merged corpus. + Approximates the planned similarity graph well enough that + every campaign resolves and every singleton stays singleton. + Trips the bound floors if any cross-fixture interference creeps + in (signal collisions across fixtures' JA3/HASSH/C2 strings). + +3. `test_noise_floor_singleton_recall_holds` — explicit assertion + that every truth-singleton (the lone wolf, the 8 inherited noise + scanners, the 10 extra noise scanners — 19 total) ends up in a + singleton predicted cluster. Singleton recall is the load- + bearing metric for this fixture. +""" +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import pytest +import yaml + +from tests.clustering.fixture_harness import ( + assert_fixture_bounds, + composite_signals_clusterer, +) +from tests.clustering.metrics import score +from tests.factories.campaign_factory import generate, load_yaml + +FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns" +FIXTURE_YAML = FIXTURE_DIR / "noise_floor.yaml" +EXPECTED_YAML = FIXTURE_DIR / "noise_floor.expected.yaml" + + +def _expand_noise_floor_spec() -> dict[str, Any]: + """Read noise_floor.yaml's include_fixtures block, load each + constituent fixture, and merge their campaigns into one + corpus-shaped spec. Returns a dict the factory's ``generate()`` + accepts as-is.""" + declared = yaml.safe_load(FIXTURE_YAML.read_text(encoding="utf-8")) + campaigns: list[dict[str, Any]] = [] + inherited_noise = 0 + for fname in declared["include_fixtures"]: + sub = load_yaml(FIXTURE_DIR / fname) + if "corpus" in sub: + campaigns.extend(sub["corpus"].get("campaigns", [])) + inherited_noise += int( + (sub["corpus"].get("noise") or {}).get("scanner_count", 0) + ) + else: + campaigns.append({"campaign": sub["campaign"]}) + extra = int(declared.get("extra_noise_scanners", 0)) + return { + "corpus": { + "campaigns": campaigns, + "noise": {"scanner_count": inherited_noise + extra}, + } + } + + +def test_noise_floor_corpus_integrity() -> None: + spec = _expand_noise_floor_spec() + corpus = generate(spec, seed=0) + + truth_campaigns = {a.truth_campaign_id for a in corpus.attackers} + + # Every constituent fixture's campaign id appears in the merged + # corpus. Any missing id means the loader dropped a fixture. + expected_campaign_ids = { + "shared-wordlist-A", + "shared-wordlist-B", + "vpn-hopping-001", + "lone-wolf-001", + "paused-campaign-001", + "multi-operator-001", + } + assert expected_campaign_ids <= truth_campaigns, ( + f"missing campaign ids: {expected_campaign_ids - truth_campaigns}" + ) + + # Noise scanner count: 8 inherited from lone_wolf + 10 added. + noise_attackers = [ + a for a in corpus.attackers + if a.truth_campaign_id.startswith("noise-scanner-") + ] + assert len(noise_attackers) == 18 + + # Every noise scanner is its own truth-campaign (singleton). + noise_truth = {a.truth_campaign_id for a in noise_attackers} + assert len(noise_truth) == 18 + + # Real-campaign attackers: 2 (shared_wordlist) + 5 (vpn_hopping) + + # 1 (lone_wolf wolf) + 2 (paused_campaign) + 2 (multi_operator) + # = 12. + real_attackers = [ + a for a in corpus.attackers + if not a.truth_campaign_id.startswith("noise-scanner-") + ] + assert len(real_attackers) == 12, ( + f"expected 12 campaign-driven attackers, got {len(real_attackers)}" + ) + + +def test_noise_floor_pipeline_passes_bounds() -> None: + spec = _expand_noise_floor_spec() + corpus = generate(spec, seed=0) + metrics = assert_fixture_bounds(corpus, composite_signals_clusterer, EXPECTED_YAML) + # The combined corpus is heterogeneous — a perfect ARI is not + # required (and the bound is loose at 0.85). Verify the harness + # produced sensible numbers anyway. + assert metrics["adjusted_rand_index"] >= 0.85 + assert metrics["singleton_recall"] >= 0.95 + + +def test_noise_floor_singleton_recall_holds() -> None: + """Every truth-singleton (lone wolf + 18 noise) must remain + singleton under the composite clusterer. Noise absorption is the + failure mode that makes campaign attribution useless in practice. + """ + spec = _expand_noise_floor_spec() + corpus = generate(spec, seed=0) + pred = composite_signals_clusterer(corpus) + + truth = corpus.truth_labels(level="campaign") + from collections import Counter + truth_counts = Counter(truth.values()) + pred_counts = Counter(pred.values()) + + true_singletons = [aid for aid, t in truth.items() if truth_counts[t] == 1] + # Truth-singletons in this composite: + # 1 lone wolf + 18 noise + 2 shared_wordlist actors (each + # campaign has one actor; campaign size 1 means truth-singleton) + # = 21. + assert len(true_singletons) == 21, ( + f"expected 21 truth-singletons, got {len(true_singletons)}" + ) + absorbed = [aid for aid in true_singletons if pred_counts[pred[aid]] != 1] + assert not absorbed, ( + f"composite clusterer absorbed {len(absorbed)} singletons into " + f"larger clusters: {absorbed[:5]}…" + ) + + metrics = score(truth, pred) + assert metrics["singleton_recall"] == pytest.approx(1.0) diff --git a/tests/fixtures/campaigns/noise_floor.expected.yaml b/tests/fixtures/campaigns/noise_floor.expected.yaml new file mode 100644 index 00000000..568786e5 --- /dev/null +++ b/tests/fixtures/campaigns/noise_floor.expected.yaml @@ -0,0 +1,24 @@ +# Bounds for fixture 6 (noise_floor). +# +# Composite corpus: ~14 campaign-driven attackers (across 5 prior +# fixtures' actors / rotations) + 18 truth-singleton noise rows +# (8 inherited from lone_wolf + 10 added by this fixture). +# +# A correct algorithm groups every campaign correctly and leaves +# every singleton singleton — score 1.0 across every metric. +# +# Singleton recall is the load-bearing metric here: noise +# absorption is the failure mode that makes campaign attribution +# useless in practice (a clusterer that pulls noise into real +# campaigns dilutes attribution to nothing). The bound floor on +# singleton_recall is what would catch that regression. +# +# Bounds are loose at v1; tighten as the algorithm matures. +adjusted_rand_index: + min: 0.85 +homogeneity: + min: 0.90 +completeness: + min: 0.80 +singleton_recall: + min: 0.95 diff --git a/tests/fixtures/campaigns/noise_floor.yaml b/tests/fixtures/campaigns/noise_floor.yaml new file mode 100644 index 00000000..66fbbeaa --- /dev/null +++ b/tests/fixtures/campaigns/noise_floor.yaml @@ -0,0 +1,34 @@ +# Fixture 6 (noise_floor) — see development/CAMPAIGN_CLUSTERING.md §2. +# +# Composite of all five prior fixtures plus N additional Delivery-only +# noise scanners. Tests that the clusterer holds every campaign +# together AND keeps every noise observation as its own singleton +# in the presence of cross-fixture interference. +# +# This is a CONTROL fixture: if any of the five constituent fixtures +# regresses inside the combined corpus but passes individually, the +# regression is in cross-corpus interaction (signal collisions, +# clusterer ambiguity, factory ID re-use, …). Catches a class of bugs +# that single-corpus fixtures cannot. +# +# Composition is expressed by listing the constituent fixtures here; +# the test file's loader expands this into a full `corpus.campaigns` +# spec at load time so the factory itself stays unaware. This format +# is fixture-6-specific — no other fixture uses ``include_fixtures``. +# +# Pass condition: ``composite_signals_clusterer`` (fingerprint OR C2 +# union-find — see fixture_harness.py) resolves every campaign and +# leaves every noise scanner singleton. The bounds in +# ``noise_floor.expected.yaml`` are global across the merged corpus. +# +# No standalone adversarial test for fixture 6 — each constituent +# fixture already ships its own. What this fixture catches is +# CROSS-fixture interference, which is a different failure mode from +# the in-fixture adversarial cases. +include_fixtures: + - shared_wordlist.yaml + - vpn_hopping.yaml + - lone_wolf.yaml + - paused_campaign.yaml + - multi_operator.yaml +extra_noise_scanners: 10