test(clustering): fixture 1 (shared_wordlist) + fixture-harness extraction

Two campaigns sharing a credential wordlist; everything else (ASN, IPs, JA3, HASSH, active hours) divergent. Pass condition: clusterer must NOT merge. Protects against the "credential overlap is identity" failure mode that commodity wordlists invite. * tests/clustering/fixture_harness.py — shared assert_fixture_bounds helper + identity_clusterer (placeholder, trivially correct on all-singleton fixtures) + credential_jaccard_clusterer (deliberately- bad reference used to PROVE the fixture catches what it should). * tests/clustering/test_shared_wordlist_fixture.py — bounds pass with identity, bounds FAIL (homogeneity → 0) with the bad credential clusterer. The latter is the proof the fixture earns its keep. * tests/fixtures/campaigns/shared_wordlist.{yaml,expected.yaml}. * tests/clustering/test_lone_wolf_fixture.py — refactored onto the shared harness. No behavior change.
2026-04-26 06:38:17 -04:00
parent 00254629f8
commit e80f3eec54
5 changed files with 363 additions and 36 deletions
--- a/tests/clustering/test_lone_wolf_fixture.py
+++ b/tests/clustering/test_lone_wolf_fixture.py
@@ -1,50 +1,37 @@
 """
 End-to-end pipeline test for fixture 3 (lone_wolf).

-Loads the YAML spec, runs the synthetic generator, applies a placeholder
-identity clusterer (each attacker → its own cluster), scores against
-the expected bounds. This is the simplest of the six fixtures and is
-deliberately the first one wired up — its ground truth is all
-singletons, so an identity clusterer trivially passes, which proves the
-DSL→factory→metrics pipeline works before any real algorithm is built.
+Loads the YAML spec, runs the synthetic generator, applies the
+identity-clusterer placeholder (each attacker → its own cluster), and
+scores against the expected bounds. This is the simplest of the six
+fixtures and is deliberately the first one wired up — its ground truth
+is all singletons, so an identity clusterer trivially passes, which
+proves the DSL → factory → metrics pipeline works before any real
+algorithm is built.

 Once the connected-components clusterer (CAMPAIGN_CLUSTERING.md §4)
-lands, this test will swap the placeholder for the real implementation
-and the same fixture must continue to pass.
+lands, the same fixture must continue to pass.
 """
 from __future__ import annotations

 from pathlib import Path

 import pytest
-import yaml

+from tests.clustering.fixture_harness import (
+    assert_fixture_bounds,
+    identity_clusterer,
+)
 from tests.clustering.metrics import score
-from tests.factories.campaign_factory import GeneratedCorpus, generate, load_yaml
+from tests.factories.campaign_factory import generate, load_yaml

 FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"


-def _identity_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
-    """Every attacker is its own cluster. Trivially correct on lone_wolf."""
-    return {a.attacker_id: f"cluster-{a.attacker_id}" for a in corpus.attackers}
-
-
 def test_lone_wolf_pipeline_passes_bounds() -> None:
    spec = load_yaml(FIXTURE_DIR / "lone_wolf.yaml")
-    bounds = yaml.safe_load((FIXTURE_DIR / "lone_wolf.expected.yaml").read_text())
-
    corpus = generate(spec, seed=0)
-    truth = corpus.truth_labels()
-    pred = _identity_clusterer(corpus)
-    metrics = score(truth, pred)
-
-    failures = []
-    for name, bound in bounds.items():
-        observed = metrics[name]
-        if observed < bound["min"]:
-            failures.append(f"{name}={observed:.3f} < min {bound['min']:.3f}")
-    assert not failures, "fixture bounds violated: " + "; ".join(failures)
+    assert_fixture_bounds(corpus, identity_clusterer, FIXTURE_DIR / "lone_wolf.expected.yaml")


 def test_lone_wolf_corpus_shape() -> None:
@@ -53,7 +40,6 @@ def test_lone_wolf_corpus_shape() -> None:
    corpus = generate(spec, seed=0)
    assert len(corpus.attackers) == 9
    assert len(corpus.sessions) == 9
-    # Every attacker is a truth-singleton (its own campaign).
    truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
    assert len(truth_campaigns) == 9

@@ -64,7 +50,7 @@ def test_identity_clusterer_fails_on_a_real_campaign() -> None:
    multi-actor campaign should make the placeholder identity clusterer
    fail completeness, since each truth-campaign gets fragmented into
    one-member clusters. If this didn't fail, our metrics would be
-    blind to false splits — and that's the entire point of fixture 4
+    blind to false splits — and that's the entire point of fixtures 4
    and 5 in the design doc.
    """
    spec = {
@@ -82,11 +68,7 @@ def test_identity_clusterer_fails_on_a_real_campaign() -> None:
        }
    }
    corpus = generate(spec, seed=0)
-    truth = corpus.truth_labels()
-    pred = _identity_clusterer(corpus)
-    metrics = score(truth, pred)
-    # Identity clusterer splits the one true campaign across 2 clusters
-    # → completeness drops below 1.0. This must hold or our metrics
-    # aren't catching what they're supposed to catch.
+    pred = identity_clusterer(corpus)
+    metrics = score(corpus.truth_labels(), pred)
    assert metrics["completeness"] < 1.0
-    assert metrics["homogeneity"] == pytest.approx(1.0)  # no false merges, just splits
+    assert metrics["homogeneity"] == pytest.approx(1.0)