merge: testing → main (reconcile 2-week divergence)

2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions
--- a/tests/clustering/init.py
+++ b/tests/clustering/init.py
--- a/tests/clustering/fixture_harness.py
+++ b/tests/clustering/fixture_harness.py
@@ -0,0 +1,488 @@
+"""
+Shared helpers for fixture-driven clustering tests.
+
+Each fixture lives at `tests/fixtures/campaigns/<name>.yaml` with paired
+`<name>.expected.yaml` bound file. The harness here keeps every per-
+fixture test file down to "load corpus → predict → assert bounds" without
+copy-pasting the bound-walk loop or reference clusterers across files.
+
+Reference clusterers are provided as the algorithm under test in each
+fixture's bound assertions; their names describe the *signal* they
+cluster on, not the quality of the result.
+
+* `identity_clusterer` — every attacker is its own cluster. Trivially
+  passes any fixture whose ground truth is all singletons (lone_wolf,
+  shared_wordlist before merge, etc). Useful as a green baseline while
+  the real connected-components algorithm is under construction.
+
+* `fingerprint_clusterer` — groups attackers by ``(ja3, hassh)``.
+  Approximates the "stable signals an attacker can't cheaply rotate"
+  arm of the planned similarity graph (see IDENTITY_RESOLUTION.md
+  Premise). Folds rotated-IP observations of one actor into one
+  cluster when the actor's JA3 + HASSH stay stable. Attackers whose
+  fingerprints are both NULL (typical of un-fingerprinted noise
+  scanners) are treated as un-mergeable — each becomes its own
+  singleton — so this clusterer doesn't trivially fuse all noise
+  into one mega-cluster.
+
+* `credential_jaccard_clusterer` — deliberately-bad reference that
+  merges any two attackers whose credential-attempt sets overlap above
+  a threshold. Exists so fixtures like `shared_wordlist` can prove
+  they fail a clusterer that relies on credential overlap alone — the
+  whole point of fixture #1.
+
+* `asn_clusterer` — deliberately-bad reference that groups attackers
+  by source ASN. Exists so fixtures like `vpn_hopping` (fixture #2)
+  can prove they fail a clusterer that treats ASN match as a
+  high-weight signal — VPN/proxy hopping shatters ASN within a single
+  identity and a clusterer that leans on it tanks completeness.
+
+* `time_window_clusterer` — deliberately-bad reference that unions
+  attackers whose session time-ranges are within ``gap_days`` of each
+  other. Exists so fixtures like `paused_campaign` (fixture #4) can
+  prove they fail a clusterer that treats short-window time proximity
+  as a primary signal — operators pause, sleep, take weekends.
+
+* `c2_callback_clusterer` — union-find on overlapping C2 callback
+  sets. Pass-clusterer for fixture 5 (multi_operator), where two
+  operators with distinct tooling share a C2 endpoint as the
+  load-bearing campaign signal. Attackers with no C2 endpoints
+  become their own singleton.
+
+* `shift_clusterer` — deliberately-bad reference that buckets
+  attackers by majority session-start hour into night/day/swing.
+  Exists so fixture 5 can prove they fail a clusterer that treats
+  shift schedule as a primary signal — operators on different
+  schedules can still share a campaign.
+
+* `composite_signals_clusterer` — union-find that combines
+  ``(ja3, hassh)`` match OR shared C2 callback into the same
+  cluster. Approximates the planned similarity graph well enough
+  to score the combined-corpus fixture (fixture 6, noise_floor).
+
+* `recency_decay_clusterer` — deliberately-bad reference that
+  starts from the same composite signal graph but weights each
+  edge by ``exp(-time_distance / half_life_days)`` and drops
+  edges below a threshold. Adversarial reference for fixture 7
+  (slow_burn): the canonical production failure mode where a
+  graph clusterer with recency decay fragments long-running
+  APT campaigns by silently expiring multi-week-old edges.
+"""
+from __future__ import annotations
+
+from collections.abc import Callable
+from pathlib import Path
+
+import yaml
+
+from tests.clustering.metrics import score
+from tests.factories.campaign_factory import GeneratedCorpus
+
+PredictFn = Callable[[GeneratedCorpus], dict[str, str]]
+
+
+def assert_fixture_bounds(
+    corpus: GeneratedCorpus,
+    predict: PredictFn,
+    expected_path: str | Path,
+    *,
+    truth_level: str = "campaign",
+) -> dict[str, float]:
+    """
+    Run `predict` against the corpus, score against ground truth, and
+    assert every metric meets the floor declared in `expected_path`.
+
+    ``truth_level`` selects the oracle: ``"campaign"`` (default) for
+    campaign-clustering fixtures, ``"identity"`` for identity-resolution
+    fixtures (where the clusterer's job is to fold N rotated-IP
+    observations into one identity), or ``"actor"`` for completeness.
+
+    Returns the observed metrics dict so callers can do additional
+    assertions (e.g. "homogeneity is *exactly* 1.0 for this fixture").
+    """
+    bounds = yaml.safe_load(Path(expected_path).read_text(encoding="utf-8"))
+    truth = corpus.truth_labels(level=truth_level)
+    pred = predict(corpus)
+    metrics = score(truth, pred)
+
+    failures = []
+    for name, bound in bounds.items():
+        observed = metrics[name]
+        floor = bound["min"]
+        if observed < floor:
+            failures.append(f"{name}={observed:.3f} < min {floor:.3f}")
+    assert not failures, (
+        "fixture bounds violated: " + "; ".join(failures)
+        + f" (full metrics: {metrics})"
+    )
+    return metrics
+
+
+# ─── Reference clusterers ───────────────────────────────────────────────────
+
+
+def identity_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
+    """Every attacker → its own cluster. Placeholder until §4 algorithm lands."""
+    return {a.attacker_id: f"cluster-{a.attacker_id}" for a in corpus.attackers}
+
+
+def fingerprint_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
+    """Group by ``(ja3, hassh)``. Un-fingerprinted rows stay singleton.
+
+    Approximates the stable-signal arm of the planned similarity graph;
+    the real algorithm in `decnet/clustering/` will extend this with
+    payload simhashes, C2 callback overlap, and phase-handoff edges.
+    """
+    pred: dict[str, str] = {}
+    for att in corpus.attackers:
+        if att.ja3 is None and att.hassh is None:
+            # No fingerprint to share — un-mergeable, own cluster.
+            pred[att.attacker_id] = f"fp-singleton-{att.attacker_id}"
+        else:
+            pred[att.attacker_id] = f"fp::{att.ja3}::{att.hassh}"
+    return pred
+
+
+def asn_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
+    """Group by source ASN. Deliberately-bad — see fixture 2."""
+    return {a.attacker_id: f"asn-{a.asn}" for a in corpus.attackers}
+
+
+def _union_find(ids: list[str]) -> tuple[
+    dict[str, str], Callable[[str], str], Callable[[str, str], None]
+]:
+    """Return (parent, find, union) for a fresh union-find over ``ids``."""
+    parent: dict[str, str] = {aid: aid for aid in ids}
+
+    def find(x: str) -> str:
+        while parent[x] != x:
+            parent[x] = parent[parent[x]]
+            x = parent[x]
+        return x
+
+    def union(x: str, y: str) -> None:
+        rx, ry = find(x), find(y)
+        if rx != ry:
+            parent[rx] = ry
+
+    return parent, find, union
+
+
+def c2_callback_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
+    """Union attackers whose session-collected C2 callback sets overlap.
+
+    Attackers with no C2 callbacks become their own singleton (an
+    un-fingerprinted opportunistic scanner has no link to anyone).
+    """
+    callbacks: dict[str, set[str]] = {}
+    for att in corpus.attackers:
+        callbacks[att.attacker_id] = {
+            s.c2_callback for s in att.sessions if s.c2_callback
+        }
+
+    ids = list(callbacks.keys())
+    _parent, find, union = _union_find(ids)
+
+    for i, a in enumerate(ids):
+        sa = callbacks[a]
+        if not sa:
+            continue
+        for b in ids[i + 1 :]:
+            sb = callbacks[b]
+            if not sb:
+                continue
+            if sa & sb:
+                union(a, b)
+
+    pred: dict[str, str] = {}
+    for aid in ids:
+        if not callbacks[aid]:
+            pred[aid] = f"c2-none-{aid}"
+        else:
+            pred[aid] = f"c2-{find(aid)}"
+    return pred
+
+
+def shift_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
+    """Bucket attackers by majority session-start hour into night /
+    day / swing. Deliberately-bad — see fixture 5.
+
+    Buckets:
+      * night  — hours [22, 23, 0, 1, 2, 3, 4, 5]
+      * day    — hours [6, 7, 8, 9, 10, 11, 12, 13]
+      * swing  — hours [14, 15, 16, 17, 18, 19, 20, 21]
+
+    Attackers with no sessions become their own singleton.
+    """
+    night = {22, 23, 0, 1, 2, 3, 4, 5}
+    day = {6, 7, 8, 9, 10, 11, 12, 13}
+
+    def bucket(hour: int) -> str:
+        if hour in night:
+            return "night"
+        if hour in day:
+            return "day"
+        return "swing"
+
+    pred: dict[str, str] = {}
+    for att in corpus.attackers:
+        if not att.sessions:
+            pred[att.attacker_id] = f"shift-none-{att.attacker_id}"
+            continue
+        counts: dict[str, int] = {}
+        for s in att.sessions:
+            b = bucket(s.started_at.hour)
+            counts[b] = counts.get(b, 0) + 1
+        majority = max(counts, key=lambda k: counts[k])
+        pred[att.attacker_id] = f"shift-{majority}"
+    return pred
+
+
+def composite_signals_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
+    """Union-find combining ``(ja3, hassh)`` match OR overlapping C2
+    callback sets. Approximates the stable-signals + C2-overlap arms
+    of the planned similarity graph; used as the pass-clusterer for
+    fixture 6 where multiple campaigns + noise are scored together.
+
+    Attackers with NO signals (no fingerprint, no C2) stay singleton.
+    """
+    callbacks: dict[str, set[str]] = {}
+    fingerprint: dict[str, tuple[str | None, str | None] | None] = {}
+    for att in corpus.attackers:
+        callbacks[att.attacker_id] = {
+            s.c2_callback for s in att.sessions if s.c2_callback
+        }
+        if att.ja3 is None and att.hassh is None:
+            fingerprint[att.attacker_id] = None
+        else:
+            fingerprint[att.attacker_id] = (att.ja3, att.hassh)
+
+    ids = list(callbacks.keys())
+    _parent, find, union = _union_find(ids)
+
+    # Fingerprint edges.
+    by_fp: dict[tuple[str | None, str | None], list[str]] = {}
+    for aid, fp in fingerprint.items():
+        if fp is None:
+            continue
+        by_fp.setdefault(fp, []).append(aid)
+    for group in by_fp.values():
+        anchor = group[0]
+        for other in group[1:]:
+            union(anchor, other)
+
+    # C2 overlap edges.
+    for i, a in enumerate(ids):
+        sa = callbacks[a]
+        if not sa:
+            continue
+        for b in ids[i + 1 :]:
+            sb = callbacks[b]
+            if not sb:
+                continue
+            if sa & sb:
+                union(a, b)
+
+    pred: dict[str, str] = {}
+    for aid in ids:
+        if fingerprint[aid] is None and not callbacks[aid]:
+            pred[aid] = f"composite-singleton-{aid}"
+        else:
+            pred[aid] = f"composite-{find(aid)}"
+    return pred
+
+
+def recency_decay_clusterer(
+    corpus: GeneratedCorpus,
+    *,
+    half_life_days: float = 14.0,
+    threshold: float = 0.5,
+) -> dict[str, str]:
+    """Composite-signal graph with exponential time decay on edges.
+
+    Same edge construction as ``composite_signals_clusterer``
+    (fingerprint match OR overlapping C2), but each edge's weight
+    is multiplied by ``exp(-time_distance / half_life_days)`` where
+    ``time_distance`` is the gap (in days) between the two attackers'
+    session-midpoint timestamps. Edges with decayed weight below
+    ``threshold`` are dropped before connected components are
+    extracted.
+
+    Deliberately-bad reference for fixture 7 (slow_burn): an APT
+    campaign that operates over months will be fragmented by any
+    clusterer that silently expires old edges. This is the canonical
+    production failure mode for recency-weighted graph clustering on
+    long-running threat actors.
+
+    Attackers with no signals or no sessions stay singleton.
+    """
+    import math
+    from datetime import timedelta
+
+    callbacks: dict[str, set[str]] = {}
+    fingerprint: dict[str, tuple[str | None, str | None] | None] = {}
+    midpoint: dict[str, "object | None"] = {}
+    for att in corpus.attackers:
+        callbacks[att.attacker_id] = {
+            s.c2_callback for s in att.sessions if s.c2_callback
+        }
+        if att.ja3 is None and att.hassh is None:
+            fingerprint[att.attacker_id] = None
+        else:
+            fingerprint[att.attacker_id] = (att.ja3, att.hassh)
+        if att.sessions:
+            starts = [s.started_at for s in att.sessions]
+            ends = [s.started_at + timedelta(seconds=s.duration_s) for s in att.sessions]
+            mid = min(starts) + (max(ends) - min(starts)) / 2
+            midpoint[att.attacker_id] = mid
+        else:
+            midpoint[att.attacker_id] = None
+
+    ids = list(callbacks.keys())
+    _parent, find, union = _union_find(ids)
+
+    def edge_strength(a: str, b: str) -> float:
+        """Base signal strength before time decay; 1.0 on match, else 0."""
+        fa, fb = fingerprint[a], fingerprint[b]
+        if fa is not None and fb is not None and fa == fb:
+            return 1.0
+        sa, sb = callbacks[a], callbacks[b]
+        if sa and sb and (sa & sb):
+            return 1.0
+        return 0.0
+
+    for i, a in enumerate(ids):
+        ma = midpoint[a]
+        if ma is None:
+            continue
+        for b in ids[i + 1 :]:
+            mb = midpoint[b]
+            if mb is None:
+                continue
+            base = edge_strength(a, b)
+            if base <= 0.0:
+                continue
+            gap_days = abs((ma - mb).total_seconds()) / 86400.0
+            weight = base * math.exp(-gap_days / half_life_days)
+            if weight >= threshold:
+                union(a, b)
+
+    pred: dict[str, str] = {}
+    for aid in ids:
+        if fingerprint[aid] is None and not callbacks[aid]:
+            pred[aid] = f"recency-singleton-{aid}"
+        else:
+            pred[aid] = f"recency-{find(aid)}"
+    return pred
+
+
+def time_window_clusterer(
+    corpus: GeneratedCorpus, *, gap_days: float = 1.0
+) -> dict[str, str]:
+    """Union-find over attackers, edge if their session time-ranges
+    overlap or are within ``gap_days`` of each other.
+
+    Deliberately-bad reference for fixture 4 (paused_campaign): a
+    campaign that goes silent for several days will be split into
+    "before pause" and "after pause" clusters by this clusterer,
+    breaching completeness. The real algorithm must not lean on
+    short-window time proximity as a primary signal — operators
+    pause, sleep, switch shifts, take weekends. Time bursts are a
+    weak hint, not a hard partition.
+
+    Attackers with no sessions become their own singleton cluster.
+    """
+    from datetime import timedelta
+
+    gap = timedelta(days=gap_days)
+    ids = [a.attacker_id for a in corpus.attackers]
+    ranges: dict[str, tuple] = {}
+    for att in corpus.attackers:
+        if not att.sessions:
+            continue
+        starts = [s.started_at for s in att.sessions]
+        ends = [s.started_at + timedelta(seconds=s.duration_s) for s in att.sessions]
+        ranges[att.attacker_id] = (min(starts), max(ends))
+
+    parent: dict[str, str] = {aid: aid for aid in ids}
+
+    def find(x: str) -> str:
+        while parent[x] != x:
+            parent[x] = parent[parent[x]]
+            x = parent[x]
+        return x
+
+    def union(x: str, y: str) -> None:
+        rx, ry = find(x), find(y)
+        if rx != ry:
+            parent[rx] = ry
+
+    keys = list(ranges.keys())
+    for i, a in enumerate(keys):
+        a_start, a_end = ranges[a]
+        for b in keys[i + 1 :]:
+            b_start, b_end = ranges[b]
+            # Time-distance between the two ranges (0 if they overlap).
+            if a_end < b_start:
+                separation = b_start - a_end
+            elif b_end < a_start:
+                separation = a_start - b_end
+            else:
+                separation = timedelta(0)
+            if separation <= gap:
+                union(a, b)
+
+    return {aid: find(aid) for aid in ids}
+
+
+def credential_jaccard_clusterer(
+    corpus: GeneratedCorpus, *, threshold: float = 0.5
+) -> dict[str, str]:
+    """
+    Deliberately-bad reference: union-find over attackers, edge whenever
+    two attackers' credential-attempt sets have Jaccard ≥ threshold.
+
+    Used to demonstrate that fixtures targeting credential-overlap
+    failure modes (fixture 1: shared_wordlist) actually catch a clusterer
+    that leans on credential signals alone. NOT the real algorithm.
+    """
+    # Build per-attacker credential sets.
+    creds: dict[str, set[tuple[str, str]]] = {}
+    for att in corpus.attackers:
+        s: set[tuple[str, str]] = set()
+        for sess in att.sessions:
+            s.update(sess.credentials_tried)
+        creds[att.attacker_id] = s
+
+    # Union-find.
+    parent: dict[str, str] = {aid: aid for aid in creds}
+
+    def find(x: str) -> str:
+        while parent[x] != x:
+            parent[x] = parent[parent[x]]
+            x = parent[x]
+        return x
+
+    def union(x: str, y: str) -> None:
+        rx, ry = find(x), find(y)
+        if rx != ry:
+            parent[rx] = ry
+
+    ids = list(creds.keys())
+    for i, a in enumerate(ids):
+        sa = creds[a]
+        if not sa:
+            continue
+        for b in ids[i + 1 :]:
+            sb = creds[b]
+            if not sb:
+                continue
+            inter = len(sa & sb)
+            union_size = len(sa | sb)
+            if union_size == 0:
+                continue
+            jaccard = inter / union_size
+            if jaccard >= threshold:
+                union(a, b)
+
+    return {aid: find(aid) for aid in ids}
--- a/tests/clustering/metrics.py
+++ b/tests/clustering/metrics.py
@@ -0,0 +1,179 @@
+"""
+Clustering metric harness — see development/CAMPAIGN_CLUSTERING.md §3.
+
+Decided BEFORE any clustering algorithm exists, on purpose: if the
+metrics get picked after seeing results, they'll flatter whatever the
+algorithm happens to produce.
+
+Four metrics, none on its own sufficient:
+
+  * Adjusted Rand Index — headline number, chance-corrected agreement
+    between predicted clusters and ground truth.
+  * Homogeneity — each predicted cluster contains only one true class.
+    Catches FALSE MERGES (campaigns wrongly fused).
+  * Completeness — every member of a true class lands in the same
+    predicted cluster. Catches FALSE SPLITS (one campaign wrongly torn
+    apart).
+  * Singleton recall — fraction of ground-truth singletons (lone wolves,
+    background noise) that are kept singleton by the clusterer.
+
+Implemented from first principles in pure Python so the test harness
+doesn't pull sklearn/numpy into the runtime dependency surface.
+"""
+from __future__ import annotations
+
+import math
+from collections import Counter, defaultdict
+
+
+def _comb2(n: int) -> int:
+    """C(n, 2) — number of unordered pairs from n items."""
+    return n * (n - 1) // 2 if n >= 2 else 0
+
+
+def adjusted_rand_index(truth: dict[str, str], pred: dict[str, str]) -> float:
+    """
+    Adjusted Rand Index between two clusterings over the same item set.
+
+    Range: typically [0, 1]; can dip negative for worse-than-random
+    labelings. 1.0 = identical partitions (up to label renaming),
+    0.0 ≈ chance agreement.
+
+    Both args map item_id -> cluster_id. Items must align exactly.
+    """
+    if set(truth) != set(pred):
+        raise ValueError(
+            "ARI requires identical item sets in truth and pred "
+            f"(missing in pred: {set(truth) - set(pred)}, "
+            f"missing in truth: {set(pred) - set(truth)})"
+        )
+    n = len(truth)
+    if n < 2:
+        return 1.0  # trivially "agree" on <2 items
+
+    # Build the contingency table n_ij = |cluster_i ∩ class_j|.
+    contingency: dict[tuple[str, str], int] = defaultdict(int)
+    for item, t_label in truth.items():
+        p_label = pred[item]
+        contingency[(p_label, t_label)] += 1
+
+    sum_comb = sum(_comb2(v) for v in contingency.values())
+    a_counts = Counter(pred.values())   # row sums (predicted clusters)
+    b_counts = Counter(truth.values())  # column sums (true classes)
+    sum_a = sum(_comb2(v) for v in a_counts.values())
+    sum_b = sum(_comb2(v) for v in b_counts.values())
+    total_pairs = _comb2(n)
+
+    expected = (sum_a * sum_b) / total_pairs if total_pairs else 0.0
+    max_index = (sum_a + sum_b) / 2
+    if max_index == expected:
+        # Degenerate: both clusterings are trivially equal in structure
+        # (both all-singletons, or both one-big-cluster). The math forces
+        # this — see the algebra of max_index = expected. The induced
+        # partitions are necessarily identical, so ARI is 1.0. (sklearn
+        # adopts the same convention.)
+        return 1.0
+    return (sum_comb - expected) / (max_index - expected)
+
+
+def _entropy(counts: list[int], total: int) -> float:
+    if total == 0:
+        return 0.0
+    h = 0.0
+    for c in counts:
+        if c == 0:
+            continue
+        p = c / total
+        h -= p * math.log(p)
+    return h
+
+
+def _conditional_entropy(
+    contingency: dict[tuple[str, str], int],
+    given_counts: dict[str, int],
+    total: int,
+) -> float:
+    """H(rows | cols) — i.e. entropy of class within each cluster."""
+    if total == 0:
+        return 0.0
+    h = 0.0
+    by_col: dict[str, list[int]] = defaultdict(list)
+    for (row, col), v in contingency.items():
+        by_col[col].append(v)
+    for col, vs in by_col.items():
+        col_total = given_counts[col]
+        if col_total == 0:
+            continue
+        col_entropy = _entropy(vs, col_total)
+        h += (col_total / total) * col_entropy
+    return h
+
+
+def homogeneity(truth: dict[str, str], pred: dict[str, str]) -> float:
+    """
+    1 - H(truth | pred) / H(truth). 1.0 = each predicted cluster
+    contains only members of a single true class (no false merges).
+    """
+    n = len(truth)
+    if n == 0:
+        return 1.0
+    contingency: dict[tuple[str, str], int] = defaultdict(int)
+    for item, t in truth.items():
+        contingency[(t, pred[item])] += 1
+    truth_counts = Counter(truth.values())
+    pred_counts = Counter(pred.values())
+    h_truth = _entropy(list(truth_counts.values()), n)
+    if h_truth == 0:
+        return 1.0
+    h_truth_given_pred = _conditional_entropy(contingency, dict(pred_counts), n)
+    return 1.0 - (h_truth_given_pred / h_truth)
+
+
+def completeness(truth: dict[str, str], pred: dict[str, str]) -> float:
+    """
+    1 - H(pred | truth) / H(pred). 1.0 = all members of each true class
+    are assigned to the same predicted cluster (no false splits).
+    """
+    n = len(truth)
+    if n == 0:
+        return 1.0
+    contingency: dict[tuple[str, str], int] = defaultdict(int)
+    for item, t in truth.items():
+        contingency[(pred[item], t)] += 1
+    pred_counts = Counter(pred.values())
+    truth_counts = Counter(truth.values())
+    h_pred = _entropy(list(pred_counts.values()), n)
+    if h_pred == 0:
+        return 1.0
+    h_pred_given_truth = _conditional_entropy(contingency, dict(truth_counts), n)
+    return 1.0 - (h_pred_given_truth / h_pred)
+
+
+def singleton_recall(truth: dict[str, str], pred: dict[str, str]) -> float:
+    """
+    Fraction of ground-truth singletons that the clusterer kept singleton.
+
+    A "true singleton" is an item whose truth-campaign has exactly one
+    member (lone wolves, background noise scanners). The metric exists
+    because ARI/homogeneity/completeness all dilute the cost of a
+    clusterer that absorbs noise into real campaigns — and noise
+    absorption is the failure mode that makes campaign attribution
+    useless in practice.
+    """
+    truth_counts = Counter(truth.values())
+    true_singletons = [item for item, t in truth.items() if truth_counts[t] == 1]
+    if not true_singletons:
+        return 1.0
+    pred_counts = Counter(pred.values())
+    kept = sum(1 for item in true_singletons if pred_counts[pred[item]] == 1)
+    return kept / len(true_singletons)
+
+
+def score(truth: dict[str, str], pred: dict[str, str]) -> dict[str, float]:
+    """One-shot bundle the four metrics for fixture reports."""
+    return {
+        "adjusted_rand_index": adjusted_rand_index(truth, pred),
+        "homogeneity": homogeneity(truth, pred),
+        "completeness": completeness(truth, pred),
+        "singleton_recall": singleton_recall(truth, pred),
+    }
--- a/tests/clustering/test_campaign_factory.py
+++ b/tests/clustering/test_campaign_factory.py
@@ -0,0 +1,318 @@
+"""Determinism + DSL-validation tests for the synthetic campaign factory."""
+from __future__ import annotations
+
+import pytest
+
+from decnet.clustering.ukc import UKCPhase
+from tests.factories.campaign_factory import (
+    DSLValidationError,
+    generate,
+)
+
+
+def _minimal_spec() -> dict:
+    return {
+        "campaign": {
+            "id": "c-test",
+            "actors": [{"id": "a-1", "asn": 64512}],
+            "phases": [{"name": "delivery", "actor": "a-1"}],
+            "duration_days": 1,
+        }
+    }
+
+
+def test_generation_is_deterministic_given_seed() -> None:
+    spec = _minimal_spec()
+    a = generate(spec, seed=42)
+    b = generate(spec, seed=42)
+    # IDs are RNG-driven — same seed must produce identical IDs, not
+    # merely identical structure. Otherwise federation gossip and
+    # fixture diffing both break.
+    assert [att.attacker_id for att in a.attackers] == [
+        att.attacker_id for att in b.attackers
+    ]
+    assert [s.session_id for s in a.sessions] == [s.session_id for s in b.sessions]
+
+
+def test_different_seeds_produce_different_ids() -> None:
+    spec = _minimal_spec()
+    a = generate(spec, seed=1)
+    b = generate(spec, seed=2)
+    assert a.attackers[0].attacker_id != b.attackers[0].attacker_id
+
+
+def test_truth_labels_match_dsl() -> None:
+    spec = _minimal_spec()
+    corpus = generate(spec, seed=0)
+    assert corpus.attackers[0].truth_campaign_id == "c-test"
+    assert corpus.attackers[0].truth_actor_id == "a-1"
+    # truth_labels() returns the dict the metric harness consumes.
+    labels = corpus.truth_labels()
+    assert labels[corpus.attackers[0].attacker_id] == "c-test"
+
+
+def test_unobservable_phase_emits_no_events() -> None:
+    spec = _minimal_spec()
+    spec["campaign"]["phases"] = [
+        {"name": "reconnaissance", "actor": "a-1"},  # pre-target, unobservable
+        {"name": "delivery", "actor": "a-1"},
+    ]
+    corpus = generate(spec, seed=0)
+    # Only the delivery phase should produce sessions.
+    assert all(s.phase == UKCPhase.DELIVERY for s in corpus.sessions)
+    assert len(corpus.sessions) == 1
+
+
+def test_unknown_phase_name_raises() -> None:
+    spec = _minimal_spec()
+    spec["campaign"]["phases"] = [{"name": "make_coffee", "actor": "a-1"}]
+    with pytest.raises(DSLValidationError, match="unknown UKC phase"):
+        generate(spec, seed=0)
+
+
+def test_phase_referencing_unknown_actor_raises() -> None:
+    spec = _minimal_spec()
+    spec["campaign"]["phases"] = [{"name": "delivery", "actor": "ghost"}]
+    with pytest.raises(DSLValidationError, match="unknown actor"):
+        generate(spec, seed=0)
+
+
+def test_noise_scanners_are_truth_singletons() -> None:
+    spec = {
+        "corpus": {
+            "campaigns": [_minimal_spec()],
+            "noise": {"scanner_count": 5},
+        }
+    }
+    corpus = generate(spec, seed=0)
+    # 1 campaign actor + 5 noise scanners = 6 distinct truth campaigns.
+    truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
+    assert len(truth_campaigns) == 6
+
+
+def test_multi_actor_campaign_shares_campaign_id() -> None:
+    spec = {
+        "campaign": {
+            "id": "c-shared",
+            "actors": [
+                {"id": "a-1", "asn": 14061},
+                {"id": "a-2", "asn": 14061},
+            ],
+            "phases": [
+                {"name": "delivery", "actor": "a-1"},
+                {"name": "discovery", "actor": "a-2"},
+            ],
+            "duration_days": 1,
+        }
+    }
+    corpus = generate(spec, seed=0)
+    truth = corpus.truth_labels()
+    # Both attacker rows must point to the SAME truth_campaign_id —
+    # this is the property fixture 5 (multi_operator) hinges on.
+    assert set(truth.values()) == {"c-shared"}
+
+
+# ─── ip_pool: rotating — identity-resolution fixture support ────────────────
+
+
+def test_rotating_ip_pool_emits_one_row_per_rotation_count() -> None:
+    """
+    ``rotation_count: 5`` produces 5 SyntheticAttacker rows for that
+    one DSL actor. Sticky default still produces 1.
+    """
+    spec = {
+        "campaign": {
+            "id": "c-rotating",
+            "actors": [{
+                "id": "a-1",
+                "asn": 14061,
+                "ip_pool": "rotating",
+                "rotation_count": 5,
+                "ja3": "JA3-fixed",
+                "hassh": "HASSH-fixed",
+            }],
+            "phases": [{"name": "delivery", "actor": "a-1",
+                        "target_selector": {"count": 10}}],
+            "duration_days": 1,
+        }
+    }
+    corpus = generate(spec, seed=0)
+    assert len(corpus.attackers) == 5
+
+
+def test_rotating_rows_share_identity_and_fingerprints_but_differ_on_ip() -> None:
+    """
+    All rotated rows MUST share truth_identity_id, truth_actor_id,
+    truth_campaign_id, ja3, hassh — these are the stable signals the
+    clusterer uses to recover identity. They MUST differ on ip — that's
+    what makes the test interesting.
+    """
+    spec = {
+        "campaign": {
+            "id": "c-vpn-hop",
+            "actors": [{
+                "id": "a-1",
+                "asn": 14061,
+                "ip_pool": "rotating",
+                "rotation_count": 5,
+                "ja3": "JA3-fixed",
+                "hassh": "HASSH-fixed",
+            }],
+            "phases": [{"name": "delivery", "actor": "a-1",
+                        "target_selector": {"count": 5}}],
+            "duration_days": 1,
+        }
+    }
+    corpus = generate(spec, seed=0)
+    rows = corpus.attackers
+    # Stable: shared across all 5 rows.
+    assert len({r.truth_identity_id for r in rows}) == 1
+    assert len({r.truth_actor_id for r in rows}) == 1
+    assert len({r.truth_campaign_id for r in rows}) == 1
+    assert len({r.ja3 for r in rows}) == 1
+    assert len({r.hassh for r in rows}) == 1
+    # Rotating: 5 distinct IPs.
+    assert len({r.ip for r in rows}) == 5
+
+
+def test_rotation_asns_distributed_across_rows() -> None:
+    """
+    When ``rotation_asns`` is provided, each rotated row gets the
+    corresponding ASN (cycling if shorter than rotation_count).
+    """
+    spec = {
+        "campaign": {
+            "id": "c-multi-asn",
+            "actors": [{
+                "id": "a-1",
+                "asn": 14061,  # primary, ignored when rotation_asns is set
+                "ip_pool": "rotating",
+                "rotation_count": 5,
+                "rotation_asns": [14061, 7922, 16509, 14618, 13335],
+                "ja3": "x", "hassh": "y",
+            }],
+            "phases": [{"name": "delivery", "actor": "a-1",
+                        "target_selector": {"count": 5}}],
+            "duration_days": 1,
+        }
+    }
+    corpus = generate(spec, seed=0)
+    asns = [r.asn for r in corpus.attackers]
+    assert asns == [14061, 7922, 16509, 14618, 13335]
+
+
+def test_rotation_asns_cycle_when_shorter_than_count() -> None:
+    """rotation_asns of length 2 with rotation_count=5 cycles."""
+    spec = {
+        "campaign": {
+            "id": "c-cycle",
+            "actors": [{
+                "id": "a-1",
+                "ip_pool": "rotating",
+                "rotation_count": 5,
+                "rotation_asns": [100, 200],
+                "ja3": "x", "hassh": "y",
+            }],
+            "phases": [{"name": "delivery", "actor": "a-1"}],
+            "duration_days": 1,
+        }
+    }
+    corpus = generate(spec, seed=0)
+    assert [r.asn for r in corpus.attackers] == [100, 200, 100, 200, 100]
+
+
+def test_sessions_distribute_round_robin_across_rotated_rows() -> None:
+    """
+    With rotation_count=3 and 9 sessions in a phase, each row should
+    receive 3 sessions (round-robin). This is what makes the clusterer
+    job realistic — every observation row carries its own session
+    timeline that the clusterer joins via shared fingerprints.
+    """
+    spec = {
+        "campaign": {
+            "id": "c-rr",
+            "actors": [{
+                "id": "a-1",
+                "ip_pool": "rotating",
+                "rotation_count": 3,
+                "ja3": "x", "hassh": "y",
+            }],
+            "phases": [{"name": "delivery", "actor": "a-1",
+                        "target_selector": {"count": 9}}],
+            "duration_days": 1,
+        }
+    }
+    corpus = generate(spec, seed=0)
+    counts = sorted(len(r.sessions) for r in corpus.attackers)
+    assert counts == [3, 3, 3]
+
+
+def test_truth_labels_at_identity_level() -> None:
+    """
+    corpus.truth_labels(level="identity") returns the identity-level
+    oracle the clusterer is scored against. Rotated rows for one DSL
+    actor share an identity label even though they have distinct
+    attacker_ids.
+    """
+    spec = {
+        "campaign": {
+            "id": "c-rot",
+            "actors": [{
+                "id": "a-1",
+                "ip_pool": "rotating",
+                "rotation_count": 4,
+                "ja3": "x", "hassh": "y",
+            }],
+            "phases": [{"name": "delivery", "actor": "a-1",
+                        "target_selector": {"count": 4}}],
+            "duration_days": 1,
+        }
+    }
+    corpus = generate(spec, seed=0)
+    identity_labels = corpus.truth_labels(level="identity")
+    assert len(identity_labels) == 4  # one per attacker row
+    # All 4 attackers share one identity label.
+    assert len(set(identity_labels.values())) == 1
+
+
+def test_truth_labels_unknown_level_raises() -> None:
+    spec = _minimal_spec()
+    corpus = generate(spec, seed=0)
+    with pytest.raises(ValueError, match="unknown truth-label level"):
+        corpus.truth_labels(level="campaign-but-spelled-wrong")
+
+
+def test_sticky_default_unchanged_back_compat() -> None:
+    """
+    The pre-existing sticky-default path produces exactly one row per
+    actor and assigns truth_identity_id. Smoke-tests that the
+    refactor didn't break the back-compat case.
+    """
+    corpus = generate(_minimal_spec(), seed=0)
+    assert len(corpus.attackers) == 1
+    assert corpus.attackers[0].truth_identity_id != ""
+    # Default truth_labels still returns campaign labels.
+    labels = corpus.truth_labels()
+    assert set(labels.values()) == {"c-test"}
+
+
+def test_rotated_sessions_carry_identity_label() -> None:
+    """SyntheticSession.truth_identity_id matches its parent attacker."""
+    spec = {
+        "campaign": {
+            "id": "c-rot",
+            "actors": [{
+                "id": "a-1",
+                "ip_pool": "rotating",
+                "rotation_count": 3,
+                "ja3": "x", "hassh": "y",
+            }],
+            "phases": [{"name": "delivery", "actor": "a-1",
+                        "target_selector": {"count": 6}}],
+            "duration_days": 1,
+        }
+    }
+    corpus = generate(spec, seed=0)
+    by_id = {a.attacker_id: a for a in corpus.attackers}
+    for sess in corpus.sessions:
+        assert sess.truth_identity_id == by_id[sess.attacker_id].truth_identity_id
--- a/tests/clustering/test_campaign_similarity.py
+++ b/tests/clustering/test_campaign_similarity.py
@@ -0,0 +1,344 @@
+"""Tests for campaign-level similarity primitives.
+
+Covers, in order:
+
+* Each edge family in isolation — phase-handoff, shared-infra,
+  temporal-overlap, cohort.
+* The F7 (slow_burn) time-agnostic invariant — shifting every
+  timestamp on both sides by the same Δ preserves every edge weight.
+* The F1 (shared_wordlist) failure mode — shared cohort alone must
+  NOT push a pair over threshold.
+* The F5 (multi_operator) target — phase-handoff alone (the
+  load-bearing campaign-level signal) DOES cross threshold.
+* Tier-combination arithmetic — shared-infra + temporal overlap
+  (the canonical co-op pattern) crosses threshold; shared-infra +
+  cohort does not.
+"""
+from __future__ import annotations
+
+import pytest
+
+from decnet.clustering.campaign.impl.similarity import (
+    CAMPAIGN_EDGE_THRESHOLD,
+    DEFAULT_HANDOFF_WINDOW_S,
+    IdentityFeatures,
+    cohort_weight,
+    combined_campaign_weight,
+    phase_handoff_weight,
+    shared_infra_weight,
+    temporal_overlap_weight,
+)
+
+
+def _features(uuid: str, **kwargs) -> IdentityFeatures:
+    return IdentityFeatures(identity_uuid=uuid, **kwargs)
+
+
+# ─── phase_handoff_weight ────────────────────────────────────────────────────
+
+
+def test_phase_handoff_clean_out_to_in_within_window():
+    a = _features(
+        "a",
+        last_phase_per_decky={"d1": "command_and_control"},
+        last_seen_per_decky={"d1": 1000.0},
+    )
+    b = _features(
+        "b",
+        first_phase_per_decky={"d1": "discovery"},
+        first_seen_per_decky={"d1": 1000.0 + 600.0},  # 10 min later
+    )
+    assert phase_handoff_weight(a, b) == 1.0
+
+
+def test_phase_handoff_symmetric():
+    # B finishes, A picks up. The argument order shouldn't matter.
+    b = _features(
+        "b",
+        last_phase_per_decky={"d1": "persistence"},
+        last_seen_per_decky={"d1": 5000.0},
+    )
+    a = _features(
+        "a",
+        first_phase_per_decky={"d1": "lateral_movement"},
+        first_seen_per_decky={"d1": 5000.0 + 60.0},
+    )
+    assert phase_handoff_weight(a, b) == 1.0
+    assert phase_handoff_weight(b, a) == 1.0
+
+
+def test_phase_handoff_no_decky_overlap():
+    a = _features(
+        "a",
+        last_phase_per_decky={"d1": "command_and_control"},
+        last_seen_per_decky={"d1": 1000.0},
+    )
+    b = _features(
+        "b",
+        first_phase_per_decky={"d2": "discovery"},
+        first_seen_per_decky={"d2": 1100.0},
+    )
+    assert phase_handoff_weight(a, b) == 0.0
+
+
+def test_phase_handoff_phase_mismatch():
+    # A ends mid-pivoting (not a handoff-out phase) → no signal.
+    a = _features(
+        "a",
+        last_phase_per_decky={"d1": "exploitation"},
+        last_seen_per_decky={"d1": 1000.0},
+    )
+    b = _features(
+        "b",
+        first_phase_per_decky={"d1": "discovery"},
+        first_seen_per_decky={"d1": 1100.0},
+    )
+    assert phase_handoff_weight(a, b) == 0.0
+
+
+def test_phase_handoff_outside_window():
+    a = _features(
+        "a",
+        last_phase_per_decky={"d1": "command_and_control"},
+        last_seen_per_decky={"d1": 0.0},
+    )
+    b = _features(
+        "b",
+        first_phase_per_decky={"d1": "discovery"},
+        # Way past the 24h default window.
+        first_seen_per_decky={"d1": DEFAULT_HANDOFF_WINDOW_S + 3600.0},
+    )
+    assert phase_handoff_weight(a, b) == 0.0
+
+
+def test_phase_handoff_negative_gap_rejected():
+    # B starts BEFORE A ends — that's overlap, not a handoff.
+    a = _features(
+        "a",
+        last_phase_per_decky={"d1": "persistence"},
+        last_seen_per_decky={"d1": 2000.0},
+    )
+    b = _features(
+        "b",
+        first_phase_per_decky={"d1": "lateral_movement"},
+        first_seen_per_decky={"d1": 1000.0},
+    )
+    assert phase_handoff_weight(a, b) == 0.0
+
+
+# ─── shared_infra_weight ─────────────────────────────────────────────────────
+
+
+def test_shared_infra_full_overlap():
+    a = _features(
+        "a",
+        payload_hashes=frozenset({"hash-1"}),
+        c2_endpoints=frozenset({"1.2.3.4:443"}),
+        decky_set=frozenset({"d1"}),
+    )
+    b = _features(
+        "b",
+        payload_hashes=frozenset({"hash-1"}),
+        c2_endpoints=frozenset({"1.2.3.4:443"}),
+        decky_set=frozenset({"d1"}),
+    )
+    assert shared_infra_weight(a, b) == 1.0
+
+
+def test_shared_infra_no_overlap():
+    a = _features("a", payload_hashes=frozenset({"hash-a"}))
+    b = _features("b", payload_hashes=frozenset({"hash-b"}))
+    assert shared_infra_weight(a, b) == 0.0
+
+
+def test_shared_infra_empty_returns_zero():
+    a = _features("a")
+    b = _features("b")
+    assert shared_infra_weight(a, b) == 0.0
+
+
+# ─── temporal_overlap_weight ─────────────────────────────────────────────────
+
+
+def test_temporal_overlap_full():
+    a = _features("a", session_windows=((0.0, 100.0),))
+    b = _features("b", session_windows=((0.0, 100.0),))
+    assert temporal_overlap_weight(a, b) == 1.0
+
+
+def test_temporal_overlap_partial():
+    a = _features("a", session_windows=((0.0, 100.0),))
+    b = _features("b", session_windows=((50.0, 150.0),))
+    # 50 of 100 of A's time overlaps B.
+    assert temporal_overlap_weight(a, b) == pytest.approx(0.5)
+
+
+def test_temporal_overlap_disjoint():
+    a = _features("a", session_windows=((0.0, 100.0),))
+    b = _features("b", session_windows=((200.0, 300.0),))
+    assert temporal_overlap_weight(a, b) == 0.0
+
+
+def test_temporal_overlap_empty():
+    a = _features("a")
+    b = _features("b", session_windows=((0.0, 100.0),))
+    assert temporal_overlap_weight(a, b) == 0.0
+
+
+# ─── cohort_weight ───────────────────────────────────────────────────────────
+
+
+def test_cohort_asn_overlap():
+    a = _features("a", asn_cohort=frozenset({64512}))
+    b = _features("b", asn_cohort=frozenset({64512}))
+    assert cohort_weight(a, b) == 1.0
+
+
+def test_cohort_disjoint():
+    a = _features("a", asn_cohort=frozenset({64512}))
+    b = _features("b", asn_cohort=frozenset({64513}))
+    assert cohort_weight(a, b) == 0.0
+
+
+# ─── F7 time-agnostic invariant ──────────────────────────────────────────────
+
+
+def test_f7_invariant_temporal_overlap_unchanged_under_shift():
+    # The fixture-7 (slow_burn) invariant: shifting every timestamp on
+    # BOTH sides by the same Δ must yield the same edge weight. The
+    # campaign clusterer's edges are pairwise-relative; an absolute
+    # 90-day shift must not change anything.
+    a = _features("a", session_windows=((0.0, 100.0), (300.0, 400.0)))
+    b = _features("b", session_windows=((50.0, 150.0), (350.0, 450.0)))
+    base = temporal_overlap_weight(a, b)
+    shift = 90 * 24 * 3600.0
+    a_shifted = _features(
+        "a",
+        session_windows=tuple((s + shift, e + shift) for s, e in a.session_windows),
+    )
+    b_shifted = _features(
+        "b",
+        session_windows=tuple((s + shift, e + shift) for s, e in b.session_windows),
+    )
+    assert temporal_overlap_weight(a_shifted, b_shifted) == pytest.approx(base)
+
+
+def test_f7_invariant_phase_handoff_unchanged_under_shift():
+    a = _features(
+        "a",
+        last_phase_per_decky={"d1": "command_and_control"},
+        last_seen_per_decky={"d1": 1000.0},
+    )
+    b = _features(
+        "b",
+        first_phase_per_decky={"d1": "discovery"},
+        first_seen_per_decky={"d1": 1600.0},
+    )
+    base = phase_handoff_weight(a, b)
+
+    shift = 90 * 24 * 3600.0
+    a_shifted = _features(
+        "a",
+        last_phase_per_decky=dict(a.last_phase_per_decky),
+        last_seen_per_decky={k: v + shift for k, v in a.last_seen_per_decky.items()},
+    )
+    b_shifted = _features(
+        "b",
+        first_phase_per_decky=dict(b.first_phase_per_decky),
+        first_seen_per_decky={k: v + shift for k, v in b.first_seen_per_decky.items()},
+    )
+    assert phase_handoff_weight(a_shifted, b_shifted) == base == 1.0
+
+
+# ─── Combined-weight + threshold semantics ──────────────────────────────────
+
+
+def test_phase_handoff_alone_crosses_threshold():
+    """F5 multi_operator's load-bearing signal: handoff alone is enough."""
+    a = _features(
+        "a",
+        last_phase_per_decky={"d1": "persistence"},
+        last_seen_per_decky={"d1": 1000.0},
+    )
+    b = _features(
+        "b",
+        first_phase_per_decky={"d1": "lateral_movement"},
+        first_seen_per_decky={"d1": 1100.0},
+    )
+    assert combined_campaign_weight(a, b) >= CAMPAIGN_EDGE_THRESHOLD
+
+
+def test_cohort_alone_below_threshold():
+    """F2 vpn_hopping at campaign level: cohort alone is not co-op."""
+    a = _features("a", asn_cohort=frozenset({64512}))
+    b = _features("b", asn_cohort=frozenset({64512}))
+    assert combined_campaign_weight(a, b) < CAMPAIGN_EDGE_THRESHOLD
+
+
+def test_shared_infra_alone_crosses_threshold():
+    """Shared payload + C2 alone is enough — F5's intended pass condition."""
+    a = _features(
+        "a",
+        payload_hashes=frozenset({"h"}),
+        c2_endpoints=frozenset({"c"}),
+    )
+    b = _features(
+        "b",
+        payload_hashes=frozenset({"h"}),
+        c2_endpoints=frozenset({"c"}),
+    )
+    assert combined_campaign_weight(a, b) >= CAMPAIGN_EDGE_THRESHOLD
+
+
+def test_decky_overlap_alone_below_threshold():
+    """F1's failure mode: shared targeting on a small fleet is NOT co-op.
+
+    Two campaigns hitting the same SSH deckies share no payload/C2,
+    just the decky set. Cohort tier alone must not cross threshold.
+    """
+    a = _features(
+        "a",
+        decky_set=frozenset({"d1", "d2"}),
+        asn_cohort=frozenset({64512}),
+    )
+    b = _features(
+        "b",
+        decky_set=frozenset({"d1", "d2"}),
+        asn_cohort=frozenset({64513}),
+    )
+    assert combined_campaign_weight(a, b) < CAMPAIGN_EDGE_THRESHOLD
+
+
+def test_combined_invariant_under_shift():
+    """End-to-end F7 invariant on the combined weight."""
+    a = _features(
+        "a",
+        last_phase_per_decky={"d1": "persistence"},
+        last_seen_per_decky={"d1": 1000.0},
+        session_windows=((0.0, 1500.0),),
+        payload_hashes=frozenset({"h"}),
+    )
+    b = _features(
+        "b",
+        first_phase_per_decky={"d1": "discovery"},
+        first_seen_per_decky={"d1": 1100.0},
+        session_windows=((1100.0, 2000.0),),
+        payload_hashes=frozenset({"h"}),
+    )
+    base = combined_campaign_weight(a, b)
+    shift = 90 * 24 * 3600.0
+    a_shifted = IdentityFeatures(
+        identity_uuid=a.identity_uuid,
+        last_phase_per_decky=dict(a.last_phase_per_decky),
+        last_seen_per_decky={k: v + shift for k, v in a.last_seen_per_decky.items()},
+        session_windows=tuple((s + shift, e + shift) for s, e in a.session_windows),
+        payload_hashes=a.payload_hashes,
+    )
+    b_shifted = IdentityFeatures(
+        identity_uuid=b.identity_uuid,
+        first_phase_per_decky=dict(b.first_phase_per_decky),
+        first_seen_per_decky={k: v + shift for k, v in b.first_seen_per_decky.items()},
+        session_windows=tuple((s + shift, e + shift) for s, e in b.session_windows),
+        payload_hashes=b.payload_hashes,
+    )
+    assert combined_campaign_weight(a_shifted, b_shifted) == pytest.approx(base)
--- a/tests/clustering/test_campaign_worker.py
+++ b/tests/clustering/test_campaign_worker.py
@@ -0,0 +1,357 @@
+"""End-to-end tests for the campaign-clusterer worker shell + tick.
+
+Mirrors :mod:`tests.clustering.test_clusterer_worker` for the layer
+above. Covers shell lifecycle (shutdown / cancel / raising tick),
+end-to-end ``tick`` against SQLite (form, link, merge, revoke), bus
+fan-out to the four ``campaign.*`` topics + cross-family
+``identity.campaign.assigned``, factory dispatch, and CLI gating.
+"""
+from __future__ import annotations
+
+import asyncio
+import json
+from datetime import datetime, timezone
+
+import pytest
+
+from decnet.bus import topics as _topics
+from decnet.clustering.campaign.base import (
+    CampaignClusterer,
+    CampaignClusterResult,
+)
+from decnet.clustering.campaign.factory import get_campaign_clusterer
+from decnet.clustering.campaign.impl.connected_components import (
+    ConnectedComponentsCampaignClusterer,
+    cluster_identities,
+    from_identity_row,
+)
+from decnet.clustering.campaign.impl.similarity import IdentityFeatures
+from decnet.clustering.campaign.worker import run_campaign_clusterer_loop
+from decnet.web.db.factory import get_repository
+
+
+@pytest.fixture
+async def repo(tmp_path):
+    r = get_repository(db_path=str(tmp_path / "campaign.db"))
+    await r.initialize()
+    return r
+
+
+@pytest.fixture(autouse=True)
+def _no_bus(monkeypatch):
+    """Run workers in poll-only mode — no real Unix socket."""
+    monkeypatch.setenv("DECNET_BUS_ENABLED", "false")
+
+
+# ─── Test doubles ───────────────────────────────────────────────────────────
+
+
+class _FakeClusterer(CampaignClusterer):
+    name = "fake"
+
+    def __init__(self, results=None) -> None:
+        self._results = list(results or [])
+        self.calls = 0
+
+    async def tick(self, repo) -> CampaignClusterResult:
+        self.calls += 1
+        if self._results:
+            return self._results.pop(0)
+        return CampaignClusterResult()
+
+
+class _RaisingClusterer(CampaignClusterer):
+    name = "raising"
+
+    def __init__(self) -> None:
+        self.calls = 0
+
+    async def tick(self, repo) -> CampaignClusterResult:
+        self.calls += 1
+        raise RuntimeError("boom")
+
+
+# ─── Shell lifecycle ────────────────────────────────────────────────────────
+
+
+@pytest.mark.anyio
+async def test_loop_exits_on_shutdown(repo):
+    shutdown = asyncio.Event()
+    clusterer = _FakeClusterer()
+    task = asyncio.create_task(
+        run_campaign_clusterer_loop(
+            repo, poll_interval_secs=0.05,
+            clusterer=clusterer, shutdown=shutdown,
+        )
+    )
+    await asyncio.sleep(0.12)
+    shutdown.set()
+    await asyncio.wait_for(task, timeout=2.0)
+    assert clusterer.calls >= 1
+
+
+@pytest.mark.anyio
+async def test_loop_exits_on_cancel(repo):
+    clusterer = _FakeClusterer()
+    task = asyncio.create_task(
+        run_campaign_clusterer_loop(
+            repo, poll_interval_secs=0.05, clusterer=clusterer,
+        )
+    )
+    await asyncio.sleep(0.1)
+    task.cancel()
+    await asyncio.wait_for(task, timeout=2.0)
+    assert clusterer.calls >= 1
+
+
+@pytest.mark.anyio
+async def test_tick_failure_does_not_crash_loop(repo):
+    shutdown = asyncio.Event()
+    clusterer = _RaisingClusterer()
+    task = asyncio.create_task(
+        run_campaign_clusterer_loop(
+            repo, poll_interval_secs=0.05,
+            clusterer=clusterer, shutdown=shutdown,
+        )
+    )
+    await asyncio.sleep(0.2)
+    shutdown.set()
+    await asyncio.wait_for(task, timeout=2.0)
+    assert clusterer.calls >= 2
+
+
+# ─── Bus fan-out ────────────────────────────────────────────────────────────
+
+
+@pytest.mark.anyio
+async def test_publishes_campaign_result_on_bus(monkeypatch, repo):
+    published: list[tuple[str, dict, str]] = []
+
+    async def _fake_publish(bus, topic, payload, event_type=""):
+        published.append((topic, payload, event_type))
+
+    monkeypatch.setattr(
+        "decnet.clustering.campaign.worker.publish_safely", _fake_publish,
+    )
+
+    result = CampaignClusterResult(
+        campaigns_formed=[
+            {"campaign_uuid": "c-1", "identity_uuids": ["i-1", "i-2"]},
+        ],
+        identities_assigned=[
+            {"campaign_uuid": "c-1", "identity_uuid": "i-3",
+             "prior_campaign_uuid": None},
+        ],
+        campaigns_merged=[
+            {"winner_uuid": "c-1", "loser_uuid": "c-2"},
+        ],
+        campaigns_unmerged=[
+            {"resurrected_uuid": "c-2", "former_winner_uuid": "c-1"},
+        ],
+    )
+    clusterer = _FakeClusterer(results=[result])
+
+    shutdown = asyncio.Event()
+    task = asyncio.create_task(
+        run_campaign_clusterer_loop(
+            repo, poll_interval_secs=0.05,
+            clusterer=clusterer, shutdown=shutdown,
+        )
+    )
+    await asyncio.sleep(0.1)
+    shutdown.set()
+    await asyncio.wait_for(task, timeout=2.0)
+
+    topics_seen = {t for t, _, _ in published}
+    assert _topics.campaign(_topics.CAMPAIGN_FORMED) in topics_seen
+    assert _topics.campaign(_topics.CAMPAIGN_IDENTITY_ASSIGNED) in topics_seen
+    assert _topics.campaign(_topics.CAMPAIGN_MERGED) in topics_seen
+    assert _topics.campaign(_topics.CAMPAIGN_UNMERGED) in topics_seen
+    # Cross-family signal — every campaigns_formed identity AND every
+    # identities_assigned identity should fire identity.campaign.assigned.
+    cross = _topics.identity(_topics.IDENTITY_CAMPAIGN_ASSIGNED)
+    cross_payloads = [p for t, p, _ in published if t == cross]
+    cross_idents = {p["identity_uuid"] for p in cross_payloads}
+    assert {"i-1", "i-2", "i-3"}.issubset(cross_idents)
+
+
+# ─── Pure clusterer + projection ────────────────────────────────────────────
+
+
+def test_cluster_identities_singletons():
+    a = IdentityFeatures(identity_uuid="a")
+    b = IdentityFeatures(identity_uuid="b")
+    labels = cluster_identities([a, b])
+    assert labels["a"] != labels["b"]
+
+
+def test_cluster_identities_phase_handoff_unions():
+    a = IdentityFeatures(
+        identity_uuid="a",
+        last_phase_per_decky={"d1": "command_and_control"},
+        last_seen_per_decky={"d1": 1000.0},
+    )
+    b = IdentityFeatures(
+        identity_uuid="b",
+        first_phase_per_decky={"d1": "discovery"},
+        first_seen_per_decky={"d1": 1100.0},
+    )
+    labels = cluster_identities([a, b])
+    assert labels["a"] == labels["b"]
+
+
+def test_from_identity_row_parses_json_lists():
+    feat = from_identity_row({
+        "uuid": "i-1",
+        "payload_simhashes": json.dumps(["h1", "h2"]),
+        "c2_endpoints": json.dumps(["c1"]),
+    })
+    assert feat.identity_uuid == "i-1"
+    assert feat.payload_hashes == frozenset({"h1", "h2"})
+    assert feat.c2_endpoints == frozenset({"c1"})
+
+
+def test_from_identity_row_handles_null_and_garbage():
+    f = from_identity_row({
+        "uuid": "i-1",
+        "payload_simhashes": None,
+        "c2_endpoints": "not-json",
+    })
+    assert f.payload_hashes == frozenset()
+    assert f.c2_endpoints == frozenset()
+
+
+# ─── End-to-end tick against SQLite ────────────────────────────────────────
+
+
+async def _create_identity(repo, uuid: str, **kwargs) -> str:
+    now = datetime.now(timezone.utc)
+    return await repo.create_attacker_identity({
+        "uuid": uuid,
+        "first_seen_at": now,
+        "last_seen_at": now,
+        "payload_simhashes": kwargs.get("payload_simhashes"),
+        "c2_endpoints": kwargs.get("c2_endpoints"),
+    })
+
+
+@pytest.mark.anyio
+async def test_tick_empty_db_returns_empty_result(repo):
+    c = ConnectedComponentsCampaignClusterer()
+    result = await c.tick(repo)
+    assert result.campaigns_formed == []
+    assert result.identities_assigned == []
+    assert result.campaigns_merged == []
+    assert result.campaigns_unmerged == []
+
+
+@pytest.mark.anyio
+async def test_tick_forms_campaign_for_shared_infra_co_op(repo):
+    """Two identities with shared payload + C2 fold to one campaign.
+
+    The canonical F5-style co-op pattern, exercised end-to-end through
+    the production-row adapter. ``from_identity_row`` reads
+    ``payload_simhashes`` + ``c2_endpoints`` from the AttackerIdentity
+    JSON columns, builds IdentityFeatures, and the campaign weight
+    crosses threshold on shared_infra alone.
+    """
+    await _create_identity(
+        repo, "i1",
+        payload_simhashes=json.dumps(["h1"]),
+        c2_endpoints=json.dumps(["c1"]),
+    )
+    await _create_identity(
+        repo, "i2",
+        payload_simhashes=json.dumps(["h1"]),
+        c2_endpoints=json.dumps(["c1"]),
+    )
+
+    c = ConnectedComponentsCampaignClusterer()
+    result = await c.tick(repo)
+
+    assert len(result.campaigns_formed) == 1
+    formed_idents = set(result.campaigns_formed[0]["identity_uuids"])
+    assert formed_idents == {"i1", "i2"}
+
+
+@pytest.mark.anyio
+async def test_tick_keeps_distinct_payloads_separate(repo):
+    """No payload/C2 overlap → singleton per identity."""
+    await _create_identity(
+        repo, "i1",
+        payload_simhashes=json.dumps(["h1"]),
+        c2_endpoints=json.dumps(["c1"]),
+    )
+    await _create_identity(
+        repo, "i2",
+        payload_simhashes=json.dumps(["h2"]),
+        c2_endpoints=json.dumps(["c2"]),
+    )
+
+    c = ConnectedComponentsCampaignClusterer()
+    result = await c.tick(repo)
+
+    assert len(result.campaigns_formed) == 2
+
+
+@pytest.mark.anyio
+async def test_tick_idempotent_links_existing_identity(repo):
+    """Second tick on same input doesn't double-create campaigns."""
+    await _create_identity(repo, "i1")
+    c = ConnectedComponentsCampaignClusterer()
+
+    r1 = await c.tick(repo)
+    assert len(r1.campaigns_formed) == 1
+    campaign_uuid = r1.campaigns_formed[0]["campaign_uuid"]
+
+    r2 = await c.tick(repo)
+    # Identity already linked — no new campaign, no new assignment.
+    assert r2.campaigns_formed == []
+    assert r2.identities_assigned == []
+    # And the existing assignment persisted.
+    assert await repo.count_identities_for_campaign(campaign_uuid) == 1
+
+
+@pytest.mark.anyio
+async def test_tick_skips_merged_out_identities(repo):
+    """Merged-out identity rows must not show up as cluster inputs."""
+    await _create_identity(repo, "i1")
+    await _create_identity(repo, "i2")
+    # Soft-merge i2 into i1 at the identity layer.
+    await repo.update_identity_merged_into("i2", "i1")
+
+    c = ConnectedComponentsCampaignClusterer()
+    result = await c.tick(repo)
+
+    # Only i1 is an active row; one campaign formed, with one identity.
+    assert len(result.campaigns_formed) == 1
+    assert result.campaigns_formed[0]["identity_uuids"] == ["i1"]
+
+
+# ─── Factory + CLI gating ────────────────────────────────────────────────────
+
+
+def test_factory_default():
+    c = get_campaign_clusterer()
+    assert isinstance(c, ConnectedComponentsCampaignClusterer)
+
+
+def test_factory_unknown_raises(monkeypatch):
+    monkeypatch.setenv("DECNET_CAMPAIGN_CLUSTERER_TYPE", "nope")
+    with pytest.raises(ValueError):
+        get_campaign_clusterer()
+
+
+def test_campaign_clusterer_registered_in_cli():
+    from decnet.cli.gating import MASTER_ONLY_COMMANDS
+    assert "campaign-clusterer" in MASTER_ONLY_COMMANDS
+
+
+def test_campaign_topic_builder_round_trips():
+    assert _topics.campaign(_topics.CAMPAIGN_FORMED) == "campaign.formed"
+    assert _topics.campaign(_topics.CAMPAIGN_IDENTITY_ASSIGNED) == (
+        "campaign.identity.assigned"
+    )
+    assert _topics.identity(_topics.IDENTITY_CAMPAIGN_ASSIGNED) == (
+        "identity.campaign.assigned"
+    )
--- a/tests/clustering/test_clusterer_factory.py
+++ b/tests/clustering/test_clusterer_factory.py
@@ -0,0 +1,34 @@
+"""Tests for :mod:`decnet.clustering.factory`."""
+from __future__ import annotations
+
+import pytest
+
+from decnet.clustering.base import Clusterer
+from decnet.clustering.factory import get_clusterer
+from decnet.clustering.impl.connected_components import ConnectedComponentsClusterer
+
+
+def test_default_returns_connected_components(monkeypatch):
+    monkeypatch.delenv("DECNET_CLUSTERER_TYPE", raising=False)
+    c = get_clusterer()
+    assert isinstance(c, ConnectedComponentsClusterer)
+    assert isinstance(c, Clusterer)
+    assert c.name == "connected_components"
+
+
+def test_explicit_connected_components(monkeypatch):
+    monkeypatch.setenv("DECNET_CLUSTERER_TYPE", "connected_components")
+    c = get_clusterer()
+    assert isinstance(c, ConnectedComponentsClusterer)
+
+
+def test_unknown_clusterer_type_raises(monkeypatch):
+    monkeypatch.setenv("DECNET_CLUSTERER_TYPE", "nope")
+    with pytest.raises(ValueError, match="Unknown clusterer"):
+        get_clusterer()
+
+
+def test_case_insensitive(monkeypatch):
+    monkeypatch.setenv("DECNET_CLUSTERER_TYPE", "  CONNECTED_COMPONENTS  ")
+    c = get_clusterer()
+    assert isinstance(c, ConnectedComponentsClusterer)
--- a/tests/clustering/test_clusterer_worker.py
+++ b/tests/clustering/test_clusterer_worker.py
@@ -0,0 +1,182 @@
+"""End-to-end tests for the clusterer worker shell.
+
+The skeleton clusterer is a no-op; these tests cover the shell:
+
+* exits cleanly on shutdown signal (and via cancel)
+* invokes ``tick`` on each loop iteration
+* publishes :class:`ClusterResult` side-effects on the right topics
+* a clusterer raising from ``tick`` is logged and does not crash the loop
+"""
+from __future__ import annotations
+
+import asyncio
+
+import pytest
+
+from decnet.bus import topics as _topics
+from decnet.clustering.base import Clusterer, ClusterResult
+from decnet.clustering.impl.connected_components import ConnectedComponentsClusterer
+from decnet.clustering.worker import run_clusterer_loop
+from decnet.web.db.factory import get_repository
+
+
+@pytest.fixture
+async def repo(tmp_path):
+    r = get_repository(db_path=str(tmp_path / "clusterer.db"))
+    await r.initialize()
+    return r
+
+
+@pytest.fixture(autouse=True)
+def _no_bus(monkeypatch):
+    """Run workers in poll-only mode — no real Unix socket."""
+    monkeypatch.setenv("DECNET_BUS_ENABLED", "false")
+
+
+class _FakeClusterer(Clusterer):
+    """Test double: returns canned :class:`ClusterResult` per call."""
+
+    name = "fake"
+
+    def __init__(self, results: list[ClusterResult] | None = None) -> None:
+        self._results = list(results or [])
+        self.calls = 0
+
+    async def tick(self, repo) -> ClusterResult:
+        self.calls += 1
+        if self._results:
+            return self._results.pop(0)
+        return ClusterResult()
+
+
+class _RaisingClusterer(Clusterer):
+    name = "raising"
+
+    def __init__(self) -> None:
+        self.calls = 0
+
+    async def tick(self, repo) -> ClusterResult:
+        self.calls += 1
+        raise RuntimeError("boom")
+
+
+@pytest.mark.anyio
+async def test_loop_exits_on_shutdown_signal(repo):
+    shutdown = asyncio.Event()
+    clusterer = _FakeClusterer()
+    task = asyncio.create_task(
+        run_clusterer_loop(
+            repo,
+            poll_interval_secs=0.05,
+            clusterer=clusterer,
+            shutdown=shutdown,
+        )
+    )
+    await asyncio.sleep(0.12)
+    shutdown.set()
+    await asyncio.wait_for(task, timeout=2.0)
+    assert clusterer.calls >= 1
+
+
+@pytest.mark.anyio
+async def test_loop_exits_on_cancel(repo):
+    clusterer = _FakeClusterer()
+    task = asyncio.create_task(
+        run_clusterer_loop(
+            repo,
+            poll_interval_secs=0.05,
+            clusterer=clusterer,
+        )
+    )
+    await asyncio.sleep(0.1)
+    task.cancel()
+    # The loop catches CancelledError and exits cleanly, mirroring the
+    # intel + reuse worker shells.
+    await asyncio.wait_for(task, timeout=2.0)
+    assert clusterer.calls >= 1
+
+
+@pytest.mark.anyio
+async def test_tick_failure_does_not_crash_loop(repo):
+    """A clusterer raising from tick must be logged, not propagated."""
+    shutdown = asyncio.Event()
+    clusterer = _RaisingClusterer()
+    task = asyncio.create_task(
+        run_clusterer_loop(
+            repo,
+            poll_interval_secs=0.05,
+            clusterer=clusterer,
+            shutdown=shutdown,
+        )
+    )
+    await asyncio.sleep(0.2)
+    shutdown.set()
+    await asyncio.wait_for(task, timeout=2.0)
+    # Loop kept ticking despite the raise.
+    assert clusterer.calls >= 2
+
+
+@pytest.mark.anyio
+async def test_skeleton_clusterer_returns_empty_result(repo):
+    """The connected-components skeleton produces no side-effects yet."""
+    c = ConnectedComponentsClusterer()
+    result = await c.tick(repo)
+    assert result.identities_formed == []
+    assert result.observations_linked == []
+    assert result.identities_merged == []
+    assert result.identities_unmerged == []
+
+
+@pytest.mark.anyio
+async def test_publishes_cluster_result_on_bus(monkeypatch, repo):
+    """Every entry in ClusterResult fans out to the correct topic."""
+    published: list[tuple[str, dict, str]] = []
+
+    async def _fake_publish(bus, topic, payload, event_type=""):
+        published.append((topic, payload, event_type))
+
+    monkeypatch.setattr(
+        "decnet.clustering.worker.publish_safely", _fake_publish,
+    )
+
+    result = ClusterResult(
+        identities_formed=[
+            {"identity_uuid": "id-1", "observation_uuids": ["obs-1", "obs-2"]},
+        ],
+        observations_linked=[
+            {"identity_uuid": "id-1", "observation_uuid": "obs-3"},
+        ],
+        identities_merged=[
+            {"winner_uuid": "id-1", "loser_uuid": "id-2"},
+        ],
+        identities_unmerged=[
+            {"resurrected_uuid": "id-2", "former_winner_uuid": "id-1"},
+        ],
+    )
+    clusterer = _FakeClusterer(results=[result])
+
+    shutdown = asyncio.Event()
+    task = asyncio.create_task(
+        run_clusterer_loop(
+            repo,
+            poll_interval_secs=0.05,
+            clusterer=clusterer,
+            shutdown=shutdown,
+        )
+    )
+    await asyncio.sleep(0.1)
+    shutdown.set()
+    await asyncio.wait_for(task, timeout=2.0)
+
+    topics_seen = {t for t, _, _ in published}
+    assert _topics.identity(_topics.IDENTITY_FORMED) in topics_seen
+    assert _topics.identity(_topics.IDENTITY_OBSERVATION_LINKED) in topics_seen
+    assert _topics.identity(_topics.IDENTITY_MERGED) in topics_seen
+    assert _topics.identity(_topics.IDENTITY_UNMERGED) in topics_seen
+
+
+@pytest.mark.anyio
+async def test_clusterer_registered_in_cli():
+    """`decnet clusterer` is registered as a master-only command."""
+    from decnet.cli.gating import MASTER_ONLY_COMMANDS
+    assert "clusterer" in MASTER_ONLY_COMMANDS
--- a/tests/clustering/test_connected_components.py
+++ b/tests/clustering/test_connected_components.py
@@ -0,0 +1,808 @@
+"""Tests for the connected-components clusterer (commit 4 — high-weight edges).
+
+Covers, in order:
+
+* The pure ``cluster_observations`` algorithm — singletons stay
+  isolated, exact-match high-weight signals fold them together,
+  un-fingerprinted observations stay un-mergeable.
+* The production-row adapter ``from_attacker_row`` — JA3 / HASSH
+  recovered from the fingerprints JSON; absent fields project to
+  ``None``.
+* End-to-end ``tick`` against a real SQLite repo: seeded attackers
+  with shared / divergent fingerprints get the right identity rows
+  written and the right ``identity_id`` links set.
+* Three fixture-bound assertions: lone_wolf (pure singletons),
+  shared_wordlist (no fingerprint signal — singletons), and
+  vpn_hopping at identity-level (one identity from 5 rotated IPs
+  via shared JA3 + HASSH).
+
+The tick is bus-free here — the worker shell tests cover bus fan-out
+separately. We're validating the algorithm + DB writes here.
+"""
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+
+import pytest
+
+from decnet.clustering.impl.connected_components import (
+    ConnectedComponentsClusterer,
+    cluster_observations,
+    from_attacker_row,
+)
+from decnet.clustering.impl.similarity import Observation, from_synthetic
+from decnet.web.db.factory import get_repository
+
+FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
+
+
+# ─── pure algorithm ─────────────────────────────────────────────────────────
+
+
+def _obs(obs_id: str, **kwargs) -> Observation:
+    return Observation(observation_id=obs_id, **kwargs)
+
+
+def test_cluster_observations_singletons_stay_isolated():
+    a = _obs("a", ja3="ja3-a")
+    b = _obs("b", ja3="ja3-b")
+    c = _obs("c")  # no fingerprint
+    labels = cluster_observations([a, b, c])
+    assert labels["a"] != labels["b"]
+    assert labels["b"] != labels["c"]
+    assert labels["a"] != labels["c"]
+
+
+def test_cluster_observations_ja3_match_unions():
+    a = _obs("a", ja3="ja3-shared")
+    b = _obs("b", ja3="ja3-shared")
+    c = _obs("c", ja3="ja3-other")
+    labels = cluster_observations([a, b, c])
+    assert labels["a"] == labels["b"]
+    assert labels["a"] != labels["c"]
+
+
+def test_cluster_observations_unfingerprinted_stay_separate():
+    """Two observations with no signals must NOT collapse into one
+    cluster — that would fuse every noise scanner together."""
+    a = _obs("a")
+    b = _obs("b")
+    labels = cluster_observations([a, b])
+    assert labels["a"] != labels["b"]
+
+
+def test_cluster_observations_transitive_via_payload():
+    """A↔B via JA3, B↔C via payload → A, B, C all in one component."""
+    a = _obs("a", ja3="ja3-x")
+    b = _obs("b", ja3="ja3-x", payload_hashes=frozenset({"pl-1"}))
+    c = _obs("c", payload_hashes=frozenset({"pl-1"}))
+    labels = cluster_observations([a, b, c])
+    assert labels["a"] == labels["b"] == labels["c"]
+
+
+def test_cluster_observations_empty_input():
+    assert cluster_observations([]) == {}
+
+
+def test_cluster_observations_deterministic():
+    """Same input → same labels. Load-bearing for fixture stability."""
+    obs = [_obs("a", ja3="x"), _obs("b", ja3="x"), _obs("c")]
+    assert cluster_observations(obs) == cluster_observations(obs)
+
+
+# ─── production-row adapter ────────────────────────────────────────────────
+
+
+def test_from_attacker_row_extracts_ja3_and_hassh():
+    row = {
+        "uuid": "att-1",
+        "asn": 64500,
+        "identity_id": None,
+        "fingerprints": json.dumps([
+            {"kind": "ja3", "hash": "ja3-abc"},
+            {"kind": "hassh", "hash": "hassh-def"},
+            {"kind": "jarm", "hash": "jarm-ghi"},  # not used in v1
+        ]),
+    }
+    obs = from_attacker_row(row)
+    assert obs.observation_id == "att-1"
+    assert obs.ja3 == "ja3-abc"
+    assert obs.hassh == "hassh-def"
+    assert obs.asn == 64500
+
+
+def test_from_attacker_row_handles_empty_fingerprints():
+    row = {"uuid": "att-2", "asn": None, "identity_id": None, "fingerprints": "[]"}
+    obs = from_attacker_row(row)
+    assert obs.ja3 is None
+    assert obs.hassh is None
+    assert obs.asn is None
+
+
+def test_from_attacker_row_handles_malformed_json():
+    row = {"uuid": "att-3", "asn": None, "identity_id": None, "fingerprints": "not json"}
+    obs = from_attacker_row(row)
+    assert obs.ja3 is None
+    assert obs.hassh is None
+
+
+# ─── end-to-end tick against SQLite ────────────────────────────────────────
+
+
+@pytest.fixture
+async def repo(tmp_path):
+    r = get_repository(db_path=str(tmp_path / "clusterer.db"))
+    await r.initialize()
+    return r
+
+
+async def _seed_attacker(
+    repo, ip: str, *,
+    ja3: str | None = None,
+    hassh: str | None = None,
+    asn: int | None = None,
+    cert_sha256: str | None = None,
+) -> str:
+    now = datetime.now(timezone.utc)
+    # Two-shape fingerprint payload:
+    #   - the "kind" entries feed the clusterer's from_attacker_row
+    #     (test-fixture shape, line ~115 of connected_components.py)
+    #   - the "bounty_type/payload" entries feed identity_rollup's
+    #     extract_fp_summaries (production shape, written by the
+    #     profiler from real bounty rows). Both shapes coexist in
+    #     the same JSON list so the same seed exercises clustering
+    #     AND the identity-column rollup.
+    fingerprints: list[dict] = []
+    if ja3:
+        fingerprints.append({"kind": "ja3", "hash": ja3})
+        fingerprints.append({
+            "bounty_type": "fingerprint",
+            "payload": {"fingerprint_type": "ja3", "ja3": ja3},
+        })
+    if hassh:
+        fingerprints.append({"kind": "hassh", "hash": hassh})
+        fingerprints.append({
+            "bounty_type": "fingerprint",
+            "payload": {"fingerprint_type": "hassh_server", "hash": hassh},
+        })
+    if cert_sha256:
+        fingerprints.append({
+            "bounty_type": "fingerprint",
+            "payload": {
+                "fingerprint_type": "tls_certificate",
+                "cert_sha256": cert_sha256,
+            },
+        })
+    return await repo.upsert_attacker({
+        "ip": ip,
+        "first_seen": now,
+        "last_seen": now,
+        "event_count": 1,
+        "asn": asn,
+        "fingerprints": json.dumps(fingerprints),
+    })
+
+
+@pytest.mark.anyio
+async def test_tick_on_empty_db_is_noop(repo):
+    c = ConnectedComponentsClusterer()
+    result = await c.tick(repo)
+    assert result.identities_formed == []
+    assert result.observations_linked == []
+
+
+@pytest.mark.anyio
+async def test_tick_clusters_shared_ja3(repo):
+    """Two observations with the same JA3 → one identity row, both linked."""
+    a = await _seed_attacker(repo, "1.1.1.1", ja3="ja3-x", asn=64500)
+    b = await _seed_attacker(repo, "2.2.2.2", ja3="ja3-x", asn=64501)
+
+    c = ConnectedComponentsClusterer()
+    result = await c.tick(repo)
+
+    assert len(result.identities_formed) == 1
+    formed = result.identities_formed[0]
+    assert set(formed["observation_uuids"]) == {a, b}
+
+    # Identity row exists and both attackers FK to it.
+    identity_uuid = formed["identity_uuid"]
+    identity = await repo.get_identity_by_uuid(identity_uuid)
+    assert identity is not None
+    assert identity["uuid"] == identity_uuid
+
+    obs_for_id = await repo.list_observations_for_identity(identity_uuid)
+    obs_uuids = {o["uuid"] for o in obs_for_id}
+    assert obs_uuids == {a, b}
+
+
+@pytest.mark.anyio
+async def test_tick_keeps_distinct_ja3_separate(repo):
+    """Two divergent JA3s with no other shared signal → two singletons,
+    no identity rows written (singletons stay un-clustered in v1)."""
+    await _seed_attacker(repo, "1.1.1.1", ja3="ja3-a")
+    await _seed_attacker(repo, "2.2.2.2", ja3="ja3-b")
+
+    c = ConnectedComponentsClusterer()
+    result = await c.tick(repo)
+
+    # Singletons get identity rows of their own (one observation per cluster).
+    assert len(result.identities_formed) == 2
+    for formed in result.identities_formed:
+        assert len(formed["observation_uuids"]) == 1
+
+
+@pytest.mark.anyio
+async def test_tick_merges_two_identities_when_component_spans_them(repo):
+    """Two pre-existing identities whose observations now cluster
+    together (e.g. a previously-missing fingerprint shows up) get
+    soft-merged: the smaller-uuid identity wins, the loser's
+    merged_into_uuid is set, observations stay FK'd to their
+    original identity row."""
+    # Tick 1: two distinct fingerprints → two distinct identities.
+    a = await _seed_attacker(repo, "1.1.1.1", ja3="ja3-A")
+    b = await _seed_attacker(repo, "2.2.2.2", ja3="ja3-B")
+
+    c = ConnectedComponentsClusterer()
+    first = await c.tick(repo)
+    assert len(first.identities_formed) == 2
+
+    # Snapshot the two identity uuids; we'll need them after the merge.
+    identities_after_first = await repo.list_all_identities()
+    assert len(identities_after_first) == 2
+    uuids = sorted(i["uuid"] for i in identities_after_first)
+    expected_winner, expected_loser = uuids[0], uuids[1]
+
+    # Tick 2: a bridging observation — fingerprints match BOTH prior
+    # rows. The bridge can't agree with both JA3s simultaneously, so
+    # use a HASSH that matches A and a payload that matches B.
+    # Simulate this with two new attackers, each linking a side.
+    # Simpler: change attacker A's stored fingerprint to also include
+    # ja3-B by re-seeding (in production this would be a fresh
+    # observation that bridges them).
+    bridge = await _seed_attacker(repo, "3.3.3.3", ja3="ja3-A", hassh="hassh-bridge")
+    # Make B's row carry the same hassh so the bridge can union them.
+    import json as _json
+    from datetime import datetime, timezone
+    now = datetime.now(timezone.utc)
+    await repo.upsert_attacker({
+        "ip": "2.2.2.2", "first_seen": now, "last_seen": now,
+        "event_count": 1,
+        "fingerprints": _json.dumps([
+            {"kind": "ja3", "hash": "ja3-B"},
+            {"kind": "hassh", "hash": "hassh-bridge"},
+        ]),
+    })
+
+    second = await c.tick(repo)
+    assert len(second.identities_merged) == 1
+    merge = second.identities_merged[0]
+    assert merge["winner_uuid"] == expected_winner
+    assert merge["loser_uuid"] == expected_loser
+
+    # The loser's row still exists with merged_into_uuid set.
+    all_after = {i["uuid"]: i for i in await repo.list_all_identities()}
+    assert all_after[expected_loser]["merged_into_uuid"] == expected_winner
+    assert all_after[expected_winner]["merged_into_uuid"] is None
+
+    # Observations stay FK'd to their original identity row — the
+    # merge is a soft pointer, NOT a re-point.
+    a_row = await repo.get_attacker_by_uuid(a)
+    b_row = await repo.get_attacker_by_uuid(b)
+    assert a_row["identity_id"] in {expected_winner, expected_loser}
+    assert b_row["identity_id"] in {expected_winner, expected_loser}
+
+
+@pytest.mark.anyio
+async def test_tick_unmerges_when_observations_diverge(repo):
+    """Pre-seed a soft-merged pair, then change the underlying
+    observations so they no longer cluster. The tick must clear
+    merged_into_uuid and emit identities_unmerged."""
+    import json as _json
+    from datetime import datetime, timezone
+    now = datetime.now(timezone.utc)
+
+    # Two attackers with same JA3 → tick merges them via shared
+    # high-tier signal (one identity formed).
+    a = await _seed_attacker(repo, "1.1.1.1", ja3="ja3-shared")
+    b = await _seed_attacker(repo, "2.2.2.2", ja3="ja3-shared")
+    c = ConnectedComponentsClusterer()
+    first = await c.tick(repo)
+    assert len(first.identities_formed) == 1
+    one_identity_uuid = first.identities_formed[0]["identity_uuid"]
+
+    # Force a soft-merge state: split observation b out into its own
+    # identity, then merge that back into the first via the repo
+    # directly. This emulates a state the clusterer would have
+    # arrived at across multiple ticks (form, then merge).
+    second_uuid = "00000000-0000-0000-0000-00000000bbbb"
+    await repo.create_attacker_identity({
+        "uuid": second_uuid,
+        "schema_version": 1,
+        "first_seen_at": now, "last_seen_at": now,
+        "created_at": now, "updated_at": now,
+        "observation_count": 1,
+    })
+    await repo.set_attacker_identity_id(b, second_uuid)
+    # Soft-merge second_uuid into one_identity_uuid (winner).
+    winner = min(one_identity_uuid, second_uuid)
+    loser = max(one_identity_uuid, second_uuid)
+    if loser == one_identity_uuid:
+        # Make the canonical mapping consistent with the test setup —
+        # we need the merge to be "loser → winner" by min-uuid rule.
+        # Swap ownership so the smaller-uuid keeps the active observations.
+        await repo.set_attacker_identity_id(a, winner)
+        await repo.set_attacker_identity_id(b, loser)
+    await repo.update_identity_merged_into(loser, winner)
+
+    # Verify the soft-merge is in place.
+    pre = {i["uuid"]: i for i in await repo.list_all_identities()}
+    assert pre[loser]["merged_into_uuid"] == winner
+
+    # Now change the underlying fingerprints so a and b no longer cluster.
+    await repo.upsert_attacker({
+        "ip": "2.2.2.2", "first_seen": now, "last_seen": now,
+        "event_count": 1,
+        "fingerprints": _json.dumps([{"kind": "ja3", "hash": "ja3-different"}]),
+    })
+
+    # Tick should detect the divergence and revoke the merge.
+    third = await c.tick(repo)
+    assert len(third.identities_unmerged) == 1
+    unmerged = third.identities_unmerged[0]
+    assert unmerged["resurrected_uuid"] == loser
+    assert unmerged["former_winner_uuid"] == winner
+
+    post = {i["uuid"]: i for i in await repo.list_all_identities()}
+    assert post[loser]["merged_into_uuid"] is None
+    assert post[winner]["merged_into_uuid"] is None
+
+
+@pytest.mark.anyio
+async def test_tick_is_idempotent_under_no_changes(repo):
+    """Running tick twice with no state changes between produces no
+    side-effects on the second run."""
+    await _seed_attacker(repo, "1.1.1.1", ja3="ja3-x")
+    await _seed_attacker(repo, "2.2.2.2", ja3="ja3-x")
+    await _seed_attacker(repo, "3.3.3.3", ja3="ja3-y")
+
+    c = ConnectedComponentsClusterer()
+    first = await c.tick(repo)
+    second = await c.tick(repo)
+    assert second.identities_formed == []
+    assert second.observations_linked == []
+    assert second.identities_merged == []
+    assert second.identities_unmerged == []
+    # Sanity: the first tick did do something.
+    assert first.identities_formed
+
+
+@pytest.mark.anyio
+async def test_tick_links_new_observation_to_existing_identity(repo):
+    """First tick: 2 attackers cluster into one identity. Second tick:
+    a new attacker with the same JA3 should get linked, not minted."""
+    a = await _seed_attacker(repo, "1.1.1.1", ja3="ja3-x")
+    b = await _seed_attacker(repo, "2.2.2.2", ja3="ja3-x")
+
+    c = ConnectedComponentsClusterer()
+    first = await c.tick(repo)
+    assert len(first.identities_formed) == 1
+    identity_uuid = first.identities_formed[0]["identity_uuid"]
+
+    # New observation arrives; same JA3.
+    d = await _seed_attacker(repo, "3.3.3.3", ja3="ja3-x")
+
+    second = await c.tick(repo)
+    # No new identity should be formed for the existing component;
+    # observation-linked should fire for the new one.
+    formed_uuids = {f["identity_uuid"] for f in second.identities_formed}
+    assert identity_uuid not in formed_uuids, (
+        "second tick must link to the existing identity, not mint a new one"
+    )
+    linked_uuids = {l_["observation_uuid"] for l_ in second.observations_linked}
+    assert d in linked_uuids
+
+
+# ─── identity fingerprint rollup ───────────────────────────────────────────
+
+
+@pytest.mark.anyio
+async def test_tick_rolls_up_fingerprint_columns_on_create(repo):
+    """A fresh-component tick must populate ja3_hashes / hassh_hashes /
+    tls_cert_sha256 on the newly-minted identity row, deduplicated and
+    sorted across all member observations."""
+    await _seed_attacker(
+        repo, "1.1.1.1", ja3="ja3-x", hassh="hassh-y", cert_sha256="ab" * 32,
+    )
+    await _seed_attacker(
+        repo, "2.2.2.2", ja3="ja3-x", hassh="hassh-y", cert_sha256="cd" * 32,
+    )
+    c = ConnectedComponentsClusterer()
+    result = await c.tick(repo)
+    assert len(result.identities_formed) == 1
+    identity_uuid = result.identities_formed[0]["identity_uuid"]
+
+    rows = {i["uuid"]: i for i in await repo.list_all_identities()}
+    identity = rows[identity_uuid]
+    assert json.loads(identity["ja3_hashes"]) == ["ja3-x"]
+    assert json.loads(identity["hassh_hashes"]) == ["hassh-y"]
+    assert json.loads(identity["tls_cert_sha256"]) == sorted(["ab" * 32, "cd" * 32])
+
+
+@pytest.mark.anyio
+async def test_tick_rolls_up_fingerprints_on_link(repo):
+    """When a new observation links into an existing identity, the
+    rollup must reflect any new cert SHA-256 it brings."""
+    await _seed_attacker(
+        repo, "1.1.1.1", ja3="ja3-x", cert_sha256="ab" * 32,
+    )
+    c = ConnectedComponentsClusterer()
+    first = await c.tick(repo)
+    identity_uuid = first.identities_formed[0]["identity_uuid"]
+
+    # New observation, same JA3, fresh cert.
+    await _seed_attacker(
+        repo, "2.2.2.2", ja3="ja3-x", cert_sha256="cd" * 32,
+    )
+    await c.tick(repo)
+
+    rows = {i["uuid"]: i for i in await repo.list_all_identities()}
+    identity = rows[identity_uuid]
+    assert json.loads(identity["tls_cert_sha256"]) == sorted(["ab" * 32, "cd" * 32])
+
+
+@pytest.mark.anyio
+async def test_tick_leaves_columns_null_when_no_fingerprints(repo):
+    """Two attackers with NO fingerprint signal cluster as separate
+    singletons; their identity rows must keep all rollup columns NULL
+    (not "[]" — NULL distinguishes 'no signal yet' from 'known empty')."""
+    await _seed_attacker(repo, "1.1.1.1")
+    await _seed_attacker(repo, "2.2.2.2")
+    c = ConnectedComponentsClusterer()
+    await c.tick(repo)
+
+    for identity in await repo.list_all_identities():
+        assert identity["ja3_hashes"] is None
+        assert identity["hassh_hashes"] is None
+        assert identity["tls_cert_sha256"] is None
+
+
+# ─── fixture-bound assertions (in-memory) ──────────────────────────────────
+
+
+def _production_clusterer_predict(corpus) -> dict[str, str]:
+    """Run the production cluster_observations over a corpus.
+
+    Mirrors the reference clusterer signature (corpus → dict) so it can
+    be passed to ``assert_fixture_bounds``. Pure / in-memory — does NOT
+    touch the DB. The DB-side path is covered by the tick tests above.
+    """
+    obs = [from_synthetic(att) for att in corpus.attackers]
+    labels = cluster_observations(obs)
+
+    # Singletons (no shared signal) get unique cluster ids so the
+    # metrics see them as distinct classes — matches the
+    # fingerprint_clusterer reference shape on lone_wolf / shared_wordlist.
+    pred: dict[str, str] = {}
+    cluster_sizes: dict[str, int] = {}
+    for cid in labels.values():
+        cluster_sizes[cid] = cluster_sizes.get(cid, 0) + 1
+    for obs_id, cid in labels.items():
+        if cluster_sizes[cid] == 1:
+            pred[obs_id] = f"cc-singleton-{obs_id}"
+        else:
+            pred[obs_id] = cid
+    return pred
+
+
+def test_lone_wolf_passes_with_production_clusterer():
+    """Fixture 3: every actor singleton. The production clusterer
+    keeps them all separate (no shared high-weight signal)."""
+    from tests.clustering.fixture_harness import assert_fixture_bounds
+    from tests.factories.campaign_factory import generate, load_yaml
+
+    corpus = generate(load_yaml(FIXTURE_DIR / "lone_wolf.yaml"), seed=0)
+    assert_fixture_bounds(
+        corpus, _production_clusterer_predict,
+        FIXTURE_DIR / "lone_wolf.expected.yaml",
+    )
+
+
+def test_shared_wordlist_passes_with_production_clusterer():
+    """Fixture 1: two campaigns sharing only credentials, divergent
+    infra. The production clusterer (high-weight edges only) keeps
+    them separate — credential overlap is not a v1 signal yet."""
+    from tests.clustering.fixture_harness import assert_fixture_bounds
+    from tests.factories.campaign_factory import generate, load_yaml
+
+    corpus = generate(load_yaml(FIXTURE_DIR / "shared_wordlist.yaml"), seed=0)
+    assert_fixture_bounds(
+        corpus, _production_clusterer_predict,
+        FIXTURE_DIR / "shared_wordlist.expected.yaml",
+    )
+
+
+def test_paused_campaign_passes_with_production_clusterer():
+    """Fixture 4: one campaign split across two operational windows by
+    a multi-day silence. Both halves share JA3 + HASSH + payload + C2;
+    the production clusterer must fold them into one identity. Time-
+    agnostic invariant: the silence window is irrelevant to clustering."""
+    from tests.clustering.fixture_harness import assert_fixture_bounds
+    from tests.factories.campaign_factory import generate, load_yaml
+
+    corpus = generate(load_yaml(FIXTURE_DIR / "paused_campaign.yaml"), seed=0)
+    assert_fixture_bounds(
+        corpus, _production_clusterer_predict,
+        FIXTURE_DIR / "paused_campaign.expected.yaml",
+    )
+
+
+def test_multi_operator_keeps_distinct_identities_with_production_clusterer():
+    """Fixture 5 at identity-level: two operators with distinct
+    JA3 + HASSH, sharing C2 + payload. The production clusterer's
+    fingerprint-disagreement veto must keep them as 2 identities."""
+    from tests.factories.campaign_factory import generate, load_yaml
+    from tests.clustering.metrics import score
+
+    corpus = generate(load_yaml(FIXTURE_DIR / "multi_operator.yaml"), seed=0)
+    pred = _production_clusterer_predict(corpus)
+    # Two distinct truth identities; the production clusterer must
+    # produce two distinct predicted clusters (no merge across
+    # fingerprint-disagreeing operators).
+    assert len(set(pred.values())) == 2
+    metrics = score(corpus.truth_labels(level="identity"), pred)
+    # Perfect identity-level recovery: ARI = 1.0, homogeneity = 1.0.
+    assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
+    assert metrics["homogeneity"] == pytest.approx(1.0)
+
+
+def test_cluster_observations_credentials_alone_does_not_fuse():
+    """Two observations sharing a credential set but nothing else
+    must stay distinct. Fixture 1's failure mode in miniature."""
+    a = Observation(
+        observation_id="a",
+        credentials=frozenset({("root", "toor"), ("admin", "admin")}),
+    )
+    b = Observation(
+        observation_id="b",
+        credentials=frozenset({("root", "toor"), ("admin", "admin")}),
+    )
+    labels = cluster_observations([a, b])
+    assert labels["a"] != labels["b"]
+
+
+def test_cluster_observations_asn_alone_does_not_fuse():
+    """Two observations sharing only ASN must stay distinct.
+    Fixture 2's failure mode in miniature — VPN/proxy hopping
+    fragments ASN within a single identity, and ASN sharing
+    across identities is common; can't drive clustering."""
+    a = Observation(observation_id="a", asn=64500)
+    b = Observation(observation_id="b", asn=64500)
+    labels = cluster_observations([a, b])
+    assert labels["a"] != labels["b"]
+
+
+def test_cluster_observations_all_weak_signals_combined_does_not_fuse():
+    """Even credentials + commands + ASN together don't drive
+    clustering — only a high-tier signal does. Stack everything
+    a campaign-level F1+F2 hybrid would have, confirm singletons."""
+    a = Observation(
+        observation_id="a",
+        asn=64500,
+        credentials=frozenset({("root", "toor"), ("admin", "admin")}),
+        commands_by_phase={"discovery": ("ls", "id")},
+    )
+    b = Observation(
+        observation_id="b",
+        asn=64500,
+        credentials=frozenset({("root", "toor"), ("admin", "admin")}),
+        commands_by_phase={"discovery": ("ls", "id")},
+    )
+    labels = cluster_observations([a, b])
+    assert labels["a"] != labels["b"]
+
+
+def test_shared_wordlist_no_false_merge_at_identity_level():
+    """F1 ratchet: even at identity level (where each row is its own
+    identity), the production clusterer must not fuse credential-
+    sharing observations. Tightens the F1 bound by asserting
+    completeness == 1.0 at identity-level scoring (no truth identity
+    is split, because every row is its own truth identity)."""
+    from tests.factories.campaign_factory import generate, load_yaml
+    from tests.clustering.metrics import score
+
+    corpus = generate(load_yaml(FIXTURE_DIR / "shared_wordlist.yaml"), seed=0)
+    pred = _production_clusterer_predict(corpus)
+    metrics = score(corpus.truth_labels(level="identity"), pred)
+    # Each row must land in its own predicted cluster — anything else
+    # is a false merge driven by the credential-overlap signal.
+    assert len(set(pred.values())) == len(corpus.attackers)
+    assert metrics["homogeneity"] == pytest.approx(1.0)
+
+
+def test_vpn_hopping_asn_alone_would_have_fragmented_but_doesnt():
+    """F2 ratchet: vpn_hopping has 5 distinct ASNs across one identity.
+    A clusterer that lets ASN drive would split into 5; the production
+    clusterer doesn't because ASN is very-low-tier and JA3 / HASSH
+    are stable. Confirms tier discipline holds end-to-end."""
+    from tests.factories.campaign_factory import generate, load_yaml
+    corpus = generate(load_yaml(FIXTURE_DIR / "vpn_hopping.yaml"), seed=0)
+    pred = _production_clusterer_predict(corpus)
+    asns = {a.asn for a in corpus.attackers}
+    assert len(asns) == 5, "fixture sanity: 5 distinct ASNs"
+    # All 5 land in one cluster, not 5.
+    assert len(set(pred.values())) == 1
+
+
+def test_cluster_observations_medium_alone_does_not_fuse():
+    """Two observations sharing only command-sequence (medium-tier)
+    must stay in distinct clusters — medium is a supporting signal."""
+    a = Observation(
+        observation_id="a",
+        commands_by_phase={"discovery": ("ls", "id", "uname")},
+    )
+    b = Observation(
+        observation_id="b",
+        commands_by_phase={"discovery": ("ls", "id", "uname")},
+    )
+    labels = cluster_observations([a, b])
+    assert labels["a"] != labels["b"]
+
+
+def _build_noise_floor_corpus():
+    """Expand noise_floor.yaml's include_fixtures block into one corpus."""
+    import yaml as _yaml
+    from typing import Any
+    from tests.factories.campaign_factory import generate, load_yaml
+
+    declared = _yaml.safe_load(
+        (FIXTURE_DIR / "noise_floor.yaml").read_text(encoding="utf-8")
+    )
+    campaigns: list[dict[str, Any]] = []
+    inherited_noise = 0
+    for fname in declared["include_fixtures"]:
+        sub = load_yaml(FIXTURE_DIR / fname)
+        if "corpus" in sub:
+            campaigns.extend(sub["corpus"].get("campaigns", []))
+            inherited_noise += int(
+                (sub["corpus"].get("noise") or {}).get("scanner_count", 0)
+            )
+        else:
+            campaigns.append({"campaign": sub["campaign"]})
+    extra = int(declared.get("extra_noise_scanners", 0))
+    spec = {"corpus": {
+        "campaigns": campaigns,
+        "noise": {"scanner_count": inherited_noise + extra},
+    }}
+    return generate(spec, seed=0)
+
+
+def test_noise_floor_singleton_recall_holds_with_production_clusterer():
+    """Fixture 6 ratchet — noise floor isolation.
+
+    The load-bearing F6 invariant for the *production* clusterer:
+    truth-singleton noise scanners must not be absorbed into real
+    campaigns. A clusterer that pulls noise into campaigns dilutes
+    attribution to nothing.
+
+    Scored at *campaign* level so the truth-singleton noise scanners
+    align with the prediction (each noise row has its own truth
+    campaign id). Identity-level scoring is muddier here — see
+    ``test_noise_floor_intra_campaign_recovery`` below for the
+    constituent-campaign test that *is* identity-shaped.
+    """
+    from tests.clustering.metrics import score
+
+    corpus = _build_noise_floor_corpus()
+    pred = _production_clusterer_predict(corpus)
+    metrics = score(corpus.truth_labels(level="campaign"), pred)
+    assert metrics["singleton_recall"] >= 0.95, metrics
+
+
+def test_noise_floor_intra_campaign_recovery_with_production_clusterer():
+    """The other half of F6: real campaigns must still resolve through
+    the noise. Specifically: vpn_hopping's 5 rotations land in one
+    cluster (its identity-level signature), and shared_wordlist's two
+    distinct campaigns stay un-merged despite sharing wordlists.
+    Demonstrates the production clusterer's tier discipline holds
+    under cross-corpus interference, not just per-fixture in
+    isolation."""
+    corpus = _build_noise_floor_corpus()
+    pred = _production_clusterer_predict(corpus)
+
+    # vpn_hopping: all 5 rotation rows fold into one predicted cluster.
+    vpn_obs = [
+        a.attacker_id for a in corpus.attackers
+        if a.truth_campaign_id == "vpn-hopping-001"
+    ]
+    assert len(vpn_obs) == 5
+    vpn_clusters = {pred[oid] for oid in vpn_obs}
+    assert len(vpn_clusters) == 1, (
+        "vpn_hopping must consolidate to one cluster across rotations"
+    )
+
+    # shared_wordlist A and B: distinct fingerprints → must stay
+    # separate clusters despite shared credentials in the noise floor.
+    sw_a = [
+        a.attacker_id for a in corpus.attackers
+        if a.truth_campaign_id == "shared-wordlist-A"
+    ]
+    sw_b = [
+        a.attacker_id for a in corpus.attackers
+        if a.truth_campaign_id == "shared-wordlist-B"
+    ]
+    assert sw_a and sw_b
+    sw_a_clusters = {pred[oid] for oid in sw_a}
+    sw_b_clusters = {pred[oid] for oid in sw_b}
+    assert sw_a_clusters.isdisjoint(sw_b_clusters), (
+        "shared_wordlist A and B must not share a cluster"
+    )
+
+
+def test_slow_burn_passes_with_production_clusterer():
+    """Fixture 7 (slow_burn): one campaign across 3 multi-week operational
+    windows. Shared JA3 + HASSH + C2 across all 3 actors. The production
+    clusterer must fold them into one cluster — *despite* the multi-week
+    silence between windows. Time-agnostic invariant in action."""
+    from tests.clustering.fixture_harness import assert_fixture_bounds
+    from tests.factories.campaign_factory import generate, load_yaml
+
+    corpus = generate(load_yaml(FIXTURE_DIR / "slow_burn.yaml"), seed=0)
+    metrics = assert_fixture_bounds(
+        corpus, _production_clusterer_predict,
+        FIXTURE_DIR / "slow_burn.expected.yaml",
+    )
+    pred = _production_clusterer_predict(corpus)
+    # All three operational windows in one cluster — the F7 contract.
+    assert len(set(pred.values())) == 1
+    assert metrics["completeness"] == pytest.approx(1.0)
+
+
+def test_slow_burn_time_shift_invariance():
+    """Time-agnostic invariant in execution: shifting every observation's
+    session timestamps by an arbitrary delta must not change the
+    predicted clusters. This is the runtime counterpart of the
+    Observation-no-time-fields static check in test_similarity.py."""
+    from datetime import timedelta
+    from tests.factories.campaign_factory import generate, load_yaml
+
+    corpus = generate(load_yaml(FIXTURE_DIR / "slow_burn.yaml"), seed=0)
+    baseline = _production_clusterer_predict(corpus)
+
+    # Shift every session by +90 days (a full multi-month gap) and
+    # re-cluster. Predicted membership must be identical.
+    for att in corpus.attackers:
+        att.first_seen += timedelta(days=90)
+        att.last_seen += timedelta(days=90)
+        for s in att.sessions:
+            s.started_at += timedelta(days=90)
+
+    shifted = _production_clusterer_predict(corpus)
+    # Cluster ids may differ as opaque labels but membership groupings
+    # must match. Convert each prediction to canonical form: a set of
+    # frozensets of co-clustered observation_ids.
+    def _canonical(pred: dict[str, str]) -> set[frozenset[str]]:
+        groups: dict[str, set[str]] = {}
+        for oid, cid in pred.items():
+            groups.setdefault(cid, set()).add(oid)
+        return {frozenset(g) for g in groups.values()}
+
+    assert _canonical(baseline) == _canonical(shifted)
+
+
+def test_vpn_hopping_passes_at_identity_level_with_production_clusterer():
+    """Fixture 2: one rotating actor with stable JA3 + HASSH across
+    5 ASNs. The production clusterer must fold all 5 observations into
+    one identity (high-weight JA3 / HASSH agreement)."""
+    from tests.clustering.fixture_harness import assert_fixture_bounds
+    from tests.factories.campaign_factory import generate, load_yaml
+
+    corpus = generate(load_yaml(FIXTURE_DIR / "vpn_hopping.yaml"), seed=0)
+    metrics = assert_fixture_bounds(
+        corpus, _production_clusterer_predict,
+        FIXTURE_DIR / "vpn_hopping.expected.yaml",
+        truth_level="identity",
+    )
+    assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
+    assert metrics["completeness"] == pytest.approx(1.0)
--- a/tests/clustering/test_fixtures_campaign_clusterer.py
+++ b/tests/clustering/test_fixtures_campaign_clusterer.py
@@ -0,0 +1,278 @@
+"""Run the production campaign clusterer through all 7 fixtures.
+
+The 7 fixtures' YAML bounds were tuned for *reference* clusterers
+(``c2_callback_clusterer``, ``composite_signals_clusterer``, etc.).
+The production campaign clusterer (``ConnectedComponentsCampaignClusterer``)
+is the system under test now; this module asserts it meets every
+existing bound, plus a few stricter per-fixture invariants where the
+algorithm should — by design — score perfectly.
+
+The pure path is what's exercised here: ``cluster_identities``
+operating over ``IdentityFeatures`` projected via
+``from_synthetic_identity``. Each ``SyntheticAttacker`` is treated as
+one identity (identity layer is below; the campaign clusterer reads
+identities). End-to-end DB-backed validation is in
+``test_campaign_worker.py``.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import pytest
+import yaml
+
+from decnet.clustering.campaign.impl.connected_components import (
+    cluster_identities,
+)
+from decnet.clustering.campaign.impl.similarity import (
+    IdentityFeatures,
+    from_synthetic_identity,
+)
+from decnet.clustering.impl.connected_components import cluster_observations
+from decnet.clustering.impl.similarity import from_synthetic
+from tests.clustering.fixture_harness import assert_fixture_bounds
+from tests.clustering.metrics import score
+from tests.factories.campaign_factory import generate, load_yaml
+
+FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
+
+
+def _load_corpus(yaml_name: str) -> Any:
+    """Load a fixture; expand the noise_floor composite if required."""
+    path = FIXTURE_DIR / yaml_name
+    raw = yaml.safe_load(path.read_text(encoding="utf-8"))
+    if "include_fixtures" in raw:
+        # Mirror tests/clustering/test_noise_floor_fixture.py's expander —
+        # noise_floor is the only fixture that uses this format.
+        campaigns: list[dict[str, Any]] = []
+        inherited_noise = 0
+        for fname in raw["include_fixtures"]:
+            sub = load_yaml(FIXTURE_DIR / fname)
+            if "corpus" in sub:
+                campaigns.extend(sub["corpus"].get("campaigns", []))
+                inherited_noise += int(
+                    (sub["corpus"].get("noise") or {}).get("scanner_count", 0)
+                )
+            else:
+                campaigns.append({"campaign": sub["campaign"]})
+        extra = int(raw.get("extra_noise_scanners", 0))
+        spec: Any = {
+            "corpus": {
+                "campaigns": campaigns,
+                "noise": {"scanner_count": inherited_noise + extra},
+            }
+        }
+        return generate(spec, seed=0)
+    return generate(load_yaml(path), seed=0)
+
+
+def production_campaign_clusterer(corpus) -> dict[str, str]:
+    """Predict-fn adapter — chains identity + campaign clustering.
+
+    Mirrors the production pipeline: the identity clusterer groups
+    rotated-IP observations into identities, then the campaign
+    clusterer groups identities into campaigns. The harness scores
+    ``{attacker_id: cluster_id}`` so the chain preserves the
+    attacker → identity → campaign mapping.
+    """
+    # ── Layer 1: identity clustering over observations.
+    obs_list = [from_synthetic(a) for a in corpus.attackers]
+    obs_labels = cluster_observations(obs_list)
+
+    # Group attackers by their identity cluster.
+    by_identity: dict[str, list] = {}
+    for a in corpus.attackers:
+        by_identity.setdefault(obs_labels[a.attacker_id], []).append(a)
+
+    # ── Layer 2: aggregate each identity's member observations into
+    # one ``IdentityFeatures``, run campaign clustering.
+    identity_features: list[IdentityFeatures] = []
+    for identity_id, members in by_identity.items():
+        identity_features.append(_merge_features(identity_id, members))
+    campaign_labels = cluster_identities(identity_features)
+
+    # ── Map attacker_id → campaign cluster id via the identity hop.
+    return {
+        a.attacker_id: campaign_labels[obs_labels[a.attacker_id]]
+        for a in corpus.attackers
+    }
+
+
+def _merge_features(identity_uuid: str, members) -> IdentityFeatures:
+    """Aggregate per-attacker IdentityFeatures into a single identity.
+
+    Set fields union; per-decky maps are merged (first/last seen
+    extends across all member observations); session windows
+    concatenate.
+    """
+    parts = [from_synthetic_identity(a, identity_uuid=identity_uuid) for a in members]
+
+    asn_cohort: set[int] = set()
+    payload_hashes: set[str] = set()
+    c2_endpoints: set[str] = set()
+    decky_set: set[str] = set()
+    session_windows: list[tuple[float, float]] = []
+    last_phase_per_decky: dict[str, str] = {}
+    first_phase_per_decky: dict[str, str] = {}
+    last_seen_per_decky: dict[str, float] = {}
+    first_seen_per_decky: dict[str, float] = {}
+    commands_by_phase_on_decky: dict[tuple[str, str], list[str]] = {}
+
+    for p in parts:
+        asn_cohort |= p.asn_cohort
+        payload_hashes |= p.payload_hashes
+        c2_endpoints |= p.c2_endpoints
+        decky_set |= p.decky_set
+        session_windows.extend(p.session_windows)
+        for decky, ts in p.first_seen_per_decky.items():
+            cur = first_seen_per_decky.get(decky)
+            if cur is None or ts < cur:
+                first_seen_per_decky[decky] = ts
+                first_phase_per_decky[decky] = p.first_phase_per_decky.get(decky, "")
+        for decky, ts in p.last_seen_per_decky.items():
+            cur = last_seen_per_decky.get(decky)
+            if cur is None or ts > cur:
+                last_seen_per_decky[decky] = ts
+                last_phase_per_decky[decky] = p.last_phase_per_decky.get(decky, "")
+        for key, cmds in p.commands_by_phase_on_decky.items():
+            commands_by_phase_on_decky.setdefault(key, []).extend(cmds)
+
+    return IdentityFeatures(
+        identity_uuid=identity_uuid,
+        asn_cohort=frozenset(asn_cohort),
+        payload_hashes=frozenset(payload_hashes),
+        c2_endpoints=frozenset(c2_endpoints),
+        decky_set=frozenset(decky_set),
+        session_windows=tuple(session_windows),
+        last_phase_per_decky=last_phase_per_decky,
+        first_phase_per_decky=first_phase_per_decky,
+        last_seen_per_decky=last_seen_per_decky,
+        first_seen_per_decky=first_seen_per_decky,
+        commands_by_phase_on_decky={
+            k: tuple(v) for k, v in commands_by_phase_on_decky.items()
+        },
+    )
+
+
+# ─── Per-fixture bound assertions ───────────────────────────────────────────
+
+
+@pytest.mark.parametrize(
+    "yaml_name,expected_name,truth_level",
+    [
+        ("lone_wolf.yaml", "lone_wolf.expected.yaml", "campaign"),
+        ("shared_wordlist.yaml", "shared_wordlist.expected.yaml", "campaign"),
+        ("vpn_hopping.yaml", "vpn_hopping.expected.yaml", "campaign"),
+        ("paused_campaign.yaml", "paused_campaign.expected.yaml", "campaign"),
+        ("multi_operator.yaml", "multi_operator.expected.yaml", "campaign"),
+        ("noise_floor.yaml", "noise_floor.expected.yaml", "campaign"),
+        ("slow_burn.yaml", "slow_burn.expected.yaml", "campaign"),
+    ],
+)
+def test_production_campaign_clusterer_passes_fixture_bounds(
+    yaml_name: str, expected_name: str, truth_level: str,
+) -> None:
+    corpus = _load_corpus(yaml_name)
+    assert_fixture_bounds(
+        corpus,
+        production_campaign_clusterer,
+        FIXTURE_DIR / expected_name,
+        truth_level=truth_level,
+    )
+
+
+# ─── Per-fixture sharpness assertions (production clusterer specifics) ─────
+#
+# These tighten the YAML bounds for fixtures where the production
+# clusterer is expected to score *perfectly*. They live as Python
+# assertions (not YAML) so they only gate the production clusterer —
+# the YAML bounds stay loose for the reference-clusterer tests in the
+# per-fixture files. Ratcheting these up over time is safe; the YAML
+# bounds remain the floor that *every* tested clusterer must beat.
+
+
+def test_f3_lone_wolf_perfect_score() -> None:
+    """Every actor a singleton — campaign clusterer should match."""
+    corpus = _load_corpus("lone_wolf.yaml")
+    pred = production_campaign_clusterer(corpus)
+    metrics = score(corpus.truth_labels(level="campaign"), pred)
+    assert metrics["singleton_recall"] == pytest.approx(1.0)
+    assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
+
+
+def test_f1_shared_wordlist_no_false_merge() -> None:
+    """Two campaigns burning the same wordlist must NOT fuse."""
+    corpus = _load_corpus("shared_wordlist.yaml")
+    pred = production_campaign_clusterer(corpus)
+    truth = corpus.truth_labels(level="campaign")
+    # Predicted: each truth-class member should have its own cluster id
+    # (they share no payload / c2 / phase-handoff).
+    truth_to_pred: dict[str, set[str]] = {}
+    for aid, t in truth.items():
+        truth_to_pred.setdefault(t, set()).add(pred[aid])
+    # No predicted cluster spans two truth campaigns.
+    pred_to_truth: dict[str, set[str]] = {}
+    for aid, p in pred.items():
+        pred_to_truth.setdefault(p, set()).add(truth[aid])
+    assert all(len(s) == 1 for s in pred_to_truth.values()), (
+        f"shared_wordlist: predicted cluster spans multiple campaigns: "
+        f"{pred_to_truth}"
+    )
+
+
+def test_f5_multi_operator_folds_to_one_campaign() -> None:
+    """Two operators with shared payload + C2 + phase-handoff fold to one campaign."""
+    corpus = _load_corpus("multi_operator.yaml")
+    pred = production_campaign_clusterer(corpus)
+    cluster_ids = set(pred.values())
+    assert len(cluster_ids) == 1, (
+        f"multi_operator: expected 1 campaign, got {len(cluster_ids)} — "
+        f"predictions: {pred}"
+    )
+    metrics = score(corpus.truth_labels(level="campaign"), pred)
+    assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
+
+
+def test_f7_slow_burn_time_shift_invariance() -> None:
+    """Shift every timestamp +90 days — predictions must be identical.
+
+    The pure F7 invariant: campaign edges are pairwise-relative; an
+    absolute shift on every session must not change any cluster
+    assignment. Mirrors the identity-side check in
+    ``test_slow_burn_fixture.py``.
+    """
+    from datetime import timedelta
+
+    corpus = _load_corpus("slow_burn.yaml")
+    base_pred = production_campaign_clusterer(corpus)
+
+    delta = timedelta(days=90)
+    for a in corpus.attackers:
+        a.first_seen = a.first_seen + delta
+        a.last_seen = a.last_seen + delta
+        for s in a.sessions:
+            s.started_at = s.started_at + delta
+
+    shifted_pred = production_campaign_clusterer(corpus)
+
+    # Cluster id labels are opaque — what matters is the partition.
+    base_partition = _partition(base_pred)
+    shifted_partition = _partition(shifted_pred)
+    assert base_partition == shifted_partition, (
+        f"slow_burn: +90d shift changed the predicted partition\n"
+        f"base: {base_partition}\n"
+        f"shifted: {shifted_partition}"
+    )
+
+
+def _partition(labels: dict[str, str]) -> set[frozenset[str]]:
+    """Return the cluster partition (set of frozensets of member ids).
+
+    Cluster id strings are arbitrary; the equivalence we care about is
+    "which ids ended up in the same cluster?".
+    """
+    by_cluster: dict[str, set[str]] = {}
+    for member, cluster_id in labels.items():
+        by_cluster.setdefault(cluster_id, set()).add(member)
+    return {frozenset(s) for s in by_cluster.values()}
--- a/tests/clustering/test_lone_wolf_fixture.py
+++ b/tests/clustering/test_lone_wolf_fixture.py
@@ -0,0 +1,74 @@
+"""
+End-to-end pipeline test for fixture 3 (lone_wolf).
+
+Loads the YAML spec, runs the synthetic generator, applies the
+identity-clusterer placeholder (each attacker → its own cluster), and
+scores against the expected bounds. This is the simplest of the six
+fixtures and is deliberately the first one wired up — its ground truth
+is all singletons, so an identity clusterer trivially passes, which
+proves the DSL → factory → metrics pipeline works before any real
+algorithm is built.
+
+Once the connected-components clusterer (CAMPAIGN_CLUSTERING.md §4)
+lands, the same fixture must continue to pass.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from tests.clustering.fixture_harness import (
+    assert_fixture_bounds,
+    identity_clusterer,
+)
+from tests.clustering.metrics import score
+from tests.factories.campaign_factory import generate, load_yaml
+
+FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
+
+
+def test_lone_wolf_pipeline_passes_bounds() -> None:
+    spec = load_yaml(FIXTURE_DIR / "lone_wolf.yaml")
+    corpus = generate(spec, seed=0)
+    assert_fixture_bounds(corpus, identity_clusterer, FIXTURE_DIR / "lone_wolf.expected.yaml")
+
+
+def test_lone_wolf_corpus_shape() -> None:
+    """Sanity: 1 wolf + 8 noise scanners = 9 attackers, 9 sessions."""
+    spec = load_yaml(FIXTURE_DIR / "lone_wolf.yaml")
+    corpus = generate(spec, seed=0)
+    assert len(corpus.attackers) == 9
+    assert len(corpus.sessions) == 9
+    truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
+    assert len(truth_campaigns) == 9
+
+
+def test_identity_clusterer_fails_on_a_real_campaign() -> None:
+    """
+    Sanity for the harness, NOT a test of the clusterer: a real
+    multi-actor campaign should make the placeholder identity clusterer
+    fail completeness, since each truth-campaign gets fragmented into
+    one-member clusters. If this didn't fail, our metrics would be
+    blind to false splits — and that's the entire point of fixtures 4
+    and 5 in the design doc.
+    """
+    spec = {
+        "campaign": {
+            "id": "c-real",
+            "actors": [
+                {"id": "a-1", "asn": 14061},
+                {"id": "a-2", "asn": 14061},
+            ],
+            "phases": [
+                {"name": "delivery", "actor": "a-1"},
+                {"name": "discovery", "actor": "a-2"},
+            ],
+            "duration_days": 1,
+        }
+    }
+    corpus = generate(spec, seed=0)
+    pred = identity_clusterer(corpus)
+    metrics = score(corpus.truth_labels(), pred)
+    assert metrics["completeness"] < 1.0
+    assert metrics["homogeneity"] == pytest.approx(1.0)
--- a/tests/clustering/test_metrics.py
+++ b/tests/clustering/test_metrics.py
@@ -0,0 +1,76 @@
+"""Sanity tests for the clustering metric harness."""
+from __future__ import annotations
+
+import pytest
+
+from tests.clustering.metrics import (
+    adjusted_rand_index,
+    completeness,
+    homogeneity,
+    score,
+    singleton_recall,
+)
+
+
+def test_perfect_agreement_scores_one() -> None:
+    truth = {"a": "C1", "b": "C1", "c": "C2", "d": "C2"}
+    # Same partition, different label names — clustering doesn't preserve
+    # names, so renamed-but-isomorphic must still score 1.0.
+    pred = {"a": "X", "b": "X", "c": "Y", "d": "Y"}
+    s = score(truth, pred)
+    assert s["adjusted_rand_index"] == pytest.approx(1.0)
+    assert s["homogeneity"] == pytest.approx(1.0)
+    assert s["completeness"] == pytest.approx(1.0)
+    assert s["singleton_recall"] == pytest.approx(1.0)
+
+
+def test_all_singletons_perfect() -> None:
+    truth = {"a": "A", "b": "B", "c": "C"}
+    pred = {"a": "1", "b": "2", "c": "3"}
+    s = score(truth, pred)
+    assert s["singleton_recall"] == pytest.approx(1.0)
+    assert s["adjusted_rand_index"] == pytest.approx(1.0)
+
+
+def test_false_merge_drops_homogeneity() -> None:
+    truth = {"a": "C1", "b": "C2"}
+    pred = {"a": "X", "b": "X"}  # merged two distinct campaigns
+    assert homogeneity(truth, pred) == pytest.approx(0.0)
+    # Completeness is fine (each true class lives in one cluster).
+    assert completeness(truth, pred) == pytest.approx(1.0)
+
+
+def test_false_split_drops_completeness() -> None:
+    truth = {"a": "C1", "b": "C1"}
+    pred = {"a": "X", "b": "Y"}  # split one campaign into two clusters
+    assert completeness(truth, pred) == pytest.approx(0.0)
+    assert homogeneity(truth, pred) == pytest.approx(1.0)
+
+
+def test_singleton_recall_penalises_noise_absorption() -> None:
+    # 3 lone wolves + 1 real campaign with 2 members.
+    truth = {"w1": "wolf1", "w2": "wolf2", "w3": "wolf3", "c1": "C", "c2": "C"}
+    # Clusterer absorbs all wolves into the campaign.
+    pred = dict.fromkeys(truth, "BIG")
+    assert singleton_recall(truth, pred) == pytest.approx(0.0)
+    # And a clusterer that keeps wolves singleton should score 1.0
+    # on this metric, regardless of what it does with the campaign.
+    pred_ok = {"w1": "1", "w2": "2", "w3": "3", "c1": "C", "c2": "C"}
+    assert singleton_recall(truth, pred_ok) == pytest.approx(1.0)
+
+
+def test_mismatched_item_sets_raises() -> None:
+    with pytest.raises(ValueError):
+        adjusted_rand_index({"a": "X"}, {"b": "Y"})
+
+
+def test_random_labels_low_ari() -> None:
+    # ARI of an arbitrary partition vs. ground truth should be near 0,
+    # not near 1 — this is the chance-correction guarantee.
+    truth = {f"i{n}": f"C{n // 4}" for n in range(20)}
+    # Pred that ignores truth: just shuffles items into 5 buckets in
+    # an order uncorrelated with truth.
+    pred = {f"i{n}": f"X{(n * 7) % 5}" for n in range(20)}
+    ari = adjusted_rand_index(truth, pred)
+    # Loose bound — the point is "much closer to 0 than to 1".
+    assert ari < 0.3
--- a/tests/clustering/test_multi_operator_fixture.py
+++ b/tests/clustering/test_multi_operator_fixture.py
@@ -0,0 +1,134 @@
+"""
+End-to-end pipeline test for fixture 5 (multi_operator).
+
+One campaign, two operators with distinct UKC roles, distinct
+tooling (different JA3 + HASSH), distinct ASNs and IPs, on
+opposite shift schedules. What ties them is shared C2 callback +
+shared stage-1 payload hash — the planned similarity graph's
+"payload simhash + C2 endpoint match" arms are what should resolve
+them as one campaign.
+
+Three tests cover this:
+
+1. `test_multi_operator_corpus_shape` — sanity: two attackers, one
+   campaign, distinct fingerprints, shared C2 callback present in
+   both rows' sessions, distinct shift hours.
+
+2. `test_multi_operator_pipeline_passes_bounds` — runs
+   `c2_callback_clusterer` (the appropriate pass-clusterer for
+   this fixture, since fingerprint_clusterer would split the two
+   distinct operators). Folds both rows into one cluster via the
+   shared C2 endpoint.
+
+3. `test_shift_clusterer_fragments_campaign` — runs the deliberately
+   bad `shift_clusterer`. Actor A on night shift and Actor B on day
+   shift split into two clusters → completeness collapses → the
+   bound floor on completeness rejects the bad clusterer. This is
+   the canonical proof that operational-schedule overlap is NOT a
+   campaign signal.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from tests.clustering.fixture_harness import (
+    assert_fixture_bounds,
+    c2_callback_clusterer,
+    fingerprint_clusterer,
+    shift_clusterer,
+)
+from tests.clustering.metrics import score
+from tests.factories.campaign_factory import generate, load_yaml
+
+FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
+FIXTURE_YAML = FIXTURE_DIR / "multi_operator.yaml"
+EXPECTED_YAML = FIXTURE_DIR / "multi_operator.expected.yaml"
+
+
+def test_multi_operator_corpus_shape() -> None:
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    assert len(corpus.attackers) == 2
+    truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
+    assert truth_campaigns == {"multi-operator-001"}
+    # Two distinct fingerprints — the operators are different people
+    # using different tools.
+    ja3s = {a.ja3 for a in corpus.attackers}
+    hasshs = {a.hassh for a in corpus.attackers}
+    assert len(ja3s) == 2
+    assert len(hasshs) == 2
+    # Shared C2 callback across both rows' sessions.
+    by_actor = {a.truth_actor_id: a for a in corpus.attackers}
+    broker = by_actor["ops-broker-night"]
+    postex = by_actor["ops-postex-day"]
+    broker_c2s = {s.c2_callback for s in broker.sessions if s.c2_callback}
+    postex_c2s = {s.c2_callback for s in postex.sessions if s.c2_callback}
+    assert "c2.shared-op.example" in broker_c2s
+    assert "c2.shared-op.example" in postex_c2s
+    # Shifts are disjoint — load-bearing for the adversarial test.
+    broker_hours = {s.started_at.hour for s in broker.sessions}
+    postex_hours = {s.started_at.hour for s in postex.sessions}
+    assert broker_hours <= {22, 23, 0, 1, 2, 3}
+    assert postex_hours <= {9, 10, 11, 12, 13}
+
+
+def test_multi_operator_pipeline_passes_bounds() -> None:
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    metrics = assert_fixture_bounds(corpus, c2_callback_clusterer, EXPECTED_YAML)
+    pred = c2_callback_clusterer(corpus)
+    assert len(set(pred.values())) == 1, (
+        "c2_callback_clusterer should fold both operators into one cluster"
+    )
+    assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
+
+
+def test_fingerprint_clusterer_cannot_resolve_this_fixture() -> None:
+    """
+    Sanity for the harness, NOT a test of the clusterer: with two
+    distinct fingerprints and one truth campaign,
+    `fingerprint_clusterer` produces 2 clusters → completeness
+    collapses. This is *why* the fixture's pass-clusterer is
+    `c2_callback_clusterer` instead. Documents which signal
+    actually carries the campaign here.
+    """
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    pred = fingerprint_clusterer(corpus)
+    assert len(set(pred.values())) == 2
+    metrics = score(corpus.truth_labels(level="campaign"), pred)
+    assert metrics["completeness"] == pytest.approx(0.0)
+
+
+def test_shift_clusterer_fragments_campaign() -> None:
+    """
+    The fixture's reason for being. Bucket attackers by shift and
+    the two operators land in 'night' and 'day' clusters → 2
+    predicted clusters. Truth = 1 campaign → completeness collapses.
+
+    If this test ever passes (shift_clusterer satisfies the bounds),
+    the fixture has lost its discrimination power.
+    """
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    pred = shift_clusterer(corpus)
+    buckets = set(pred.values())
+    assert buckets == {"shift-night", "shift-day"}, (
+        f"expected one night cluster + one day cluster, got {buckets}"
+    )
+
+    metrics = score(corpus.truth_labels(level="campaign"), pred)
+    assert metrics["completeness"] == pytest.approx(0.0)
+
+    bounds = {
+        "adjusted_rand_index": 0.85,
+        "homogeneity": 0.90,
+        "completeness": 0.80,
+        "singleton_recall": 0.95,
+    }
+    breaches = [k for k, floor in bounds.items() if metrics[k] < floor]
+    assert "completeness" in breaches, (
+        f"fixture failed to catch the bad clusterer; observed metrics: {metrics}"
+    )
--- a/tests/clustering/test_noise_floor_fixture.py
+++ b/tests/clustering/test_noise_floor_fixture.py
@@ -0,0 +1,167 @@
+"""
+End-to-end pipeline test for fixture 6 (noise_floor).
+
+Composite corpus: bundles all five prior fixtures' campaigns + 10
+Delivery-only noise scanners on top of lone_wolf's 8 inherited
+ones. The fixture exists to catch cross-corpus interference —
+signal collisions, factory ID re-use, clusterer ambiguity that
+shows up only when multiple campaigns are scored together. Each
+constituent fixture already ships its own in-fixture adversarial
+test; fixture 6 covers a different failure class.
+
+The composition is declared in `noise_floor.yaml` via an
+``include_fixtures`` block (a fixture-6-specific format). The
+loader in this test file expands it into a full
+``corpus.campaigns`` spec at runtime, so the factory itself stays
+unaware of the include mechanism.
+
+Three tests cover this:
+
+1. `test_noise_floor_corpus_integrity` — every constituent
+   fixture's campaigns + actors are present in the merged corpus
+   with their truth labels intact, and the 10 extra noise scanners
+   are present alongside lone_wolf's 8 (truth-singletons all).
+
+2. `test_noise_floor_pipeline_passes_bounds` — runs
+   `composite_signals_clusterer` against the merged corpus.
+   Approximates the planned similarity graph well enough that
+   every campaign resolves and every singleton stays singleton.
+   Trips the bound floors if any cross-fixture interference creeps
+   in (signal collisions across fixtures' JA3/HASSH/C2 strings).
+
+3. `test_noise_floor_singleton_recall_holds` — explicit assertion
+   that every truth-singleton (the lone wolf, the 8 inherited noise
+   scanners, the 10 extra noise scanners — 19 total) ends up in a
+   singleton predicted cluster. Singleton recall is the load-
+   bearing metric for this fixture.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import pytest
+import yaml
+
+from tests.clustering.fixture_harness import (
+    assert_fixture_bounds,
+    composite_signals_clusterer,
+)
+from tests.clustering.metrics import score
+from tests.factories.campaign_factory import generate, load_yaml
+
+FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
+FIXTURE_YAML = FIXTURE_DIR / "noise_floor.yaml"
+EXPECTED_YAML = FIXTURE_DIR / "noise_floor.expected.yaml"
+
+
+def _expand_noise_floor_spec() -> dict[str, Any]:
+    """Read noise_floor.yaml's include_fixtures block, load each
+    constituent fixture, and merge their campaigns into one
+    corpus-shaped spec. Returns a dict the factory's ``generate()``
+    accepts as-is."""
+    declared = yaml.safe_load(FIXTURE_YAML.read_text(encoding="utf-8"))
+    campaigns: list[dict[str, Any]] = []
+    inherited_noise = 0
+    for fname in declared["include_fixtures"]:
+        sub = load_yaml(FIXTURE_DIR / fname)
+        if "corpus" in sub:
+            campaigns.extend(sub["corpus"].get("campaigns", []))
+            inherited_noise += int(
+                (sub["corpus"].get("noise") or {}).get("scanner_count", 0)
+            )
+        else:
+            campaigns.append({"campaign": sub["campaign"]})
+    extra = int(declared.get("extra_noise_scanners", 0))
+    return {
+        "corpus": {
+            "campaigns": campaigns,
+            "noise": {"scanner_count": inherited_noise + extra},
+        }
+    }
+
+
+def test_noise_floor_corpus_integrity() -> None:
+    spec = _expand_noise_floor_spec()
+    corpus = generate(spec, seed=0)
+
+    truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
+
+    # Every constituent fixture's campaign id appears in the merged
+    # corpus. Any missing id means the loader dropped a fixture.
+    expected_campaign_ids = {
+        "shared-wordlist-A",
+        "shared-wordlist-B",
+        "vpn-hopping-001",
+        "lone-wolf-001",
+        "paused-campaign-001",
+        "multi-operator-001",
+    }
+    assert expected_campaign_ids <= truth_campaigns, (
+        f"missing campaign ids: {expected_campaign_ids - truth_campaigns}"
+    )
+
+    # Noise scanner count: 8 inherited from lone_wolf + 10 added.
+    noise_attackers = [
+        a for a in corpus.attackers
+        if a.truth_campaign_id.startswith("noise-scanner-")
+    ]
+    assert len(noise_attackers) == 18
+
+    # Every noise scanner is its own truth-campaign (singleton).
+    noise_truth = {a.truth_campaign_id for a in noise_attackers}
+    assert len(noise_truth) == 18
+
+    # Real-campaign attackers: 2 (shared_wordlist) + 5 (vpn_hopping) +
+    # 1 (lone_wolf wolf) + 2 (paused_campaign) + 2 (multi_operator)
+    # = 12.
+    real_attackers = [
+        a for a in corpus.attackers
+        if not a.truth_campaign_id.startswith("noise-scanner-")
+    ]
+    assert len(real_attackers) == 12, (
+        f"expected 12 campaign-driven attackers, got {len(real_attackers)}"
+    )
+
+
+def test_noise_floor_pipeline_passes_bounds() -> None:
+    spec = _expand_noise_floor_spec()
+    corpus = generate(spec, seed=0)
+    metrics = assert_fixture_bounds(corpus, composite_signals_clusterer, EXPECTED_YAML)
+    # The combined corpus is heterogeneous — a perfect ARI is not
+    # required (and the bound is loose at 0.85). Verify the harness
+    # produced sensible numbers anyway.
+    assert metrics["adjusted_rand_index"] >= 0.85
+    assert metrics["singleton_recall"] >= 0.95
+
+
+def test_noise_floor_singleton_recall_holds() -> None:
+    """Every truth-singleton (lone wolf + 18 noise) must remain
+    singleton under the composite clusterer. Noise absorption is the
+    failure mode that makes campaign attribution useless in practice.
+    """
+    spec = _expand_noise_floor_spec()
+    corpus = generate(spec, seed=0)
+    pred = composite_signals_clusterer(corpus)
+
+    truth = corpus.truth_labels(level="campaign")
+    from collections import Counter
+    truth_counts = Counter(truth.values())
+    pred_counts = Counter(pred.values())
+
+    true_singletons = [aid for aid, t in truth.items() if truth_counts[t] == 1]
+    # Truth-singletons in this composite:
+    #   1 lone wolf + 18 noise + 2 shared_wordlist actors (each
+    #   campaign has one actor; campaign size 1 means truth-singleton)
+    #   = 21.
+    assert len(true_singletons) == 21, (
+        f"expected 21 truth-singletons, got {len(true_singletons)}"
+    )
+    absorbed = [aid for aid in true_singletons if pred_counts[pred[aid]] != 1]
+    assert not absorbed, (
+        f"composite clusterer absorbed {len(absorbed)} singletons into "
+        f"larger clusters: {absorbed[:5]}…"
+    )
+
+    metrics = score(truth, pred)
+    assert metrics["singleton_recall"] == pytest.approx(1.0)
--- a/tests/clustering/test_paused_campaign_fixture.py
+++ b/tests/clustering/test_paused_campaign_fixture.py
@@ -0,0 +1,140 @@
+"""
+End-to-end pipeline test for fixture 4 (paused_campaign).
+
+One campaign, two operational windows separated by a multi-day
+silent stretch (days 3-5, 0-indexed [2, 4]). Modeled as two DSL
+actors sharing JA3 + HASSH + payload + C2 callback — the
+fingerprint-stable signals a real clusterer should resolve on.
+Their ``active_days`` differ so each row's sessions land in
+disjoint time ranges; this is what gives the adversarial
+``time_window_clusterer`` something to fragment.
+
+Three tests cover this:
+
+1. `test_paused_campaign_corpus_shape` — sanity: 2 attackers, both
+   share campaign id, sessions are time-disjoint across the pause
+   window.
+
+2. `test_paused_campaign_pipeline_passes_bounds` —
+   `fingerprint_clusterer` reference folds both rows into one
+   cluster (shared JA3 + HASSH). Trivially green at campaign-level
+   scoring; the test is a ratchet point for the real algorithm.
+
+3. `test_time_window_clusterer_fragments_campaign` — runs the
+   deliberately-bad `time_window_clusterer`. With a 4-day silent
+   stretch and a 1-day union threshold, the two halves cannot be
+   bridged → 2 clusters → completeness collapses → bound rejected.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from tests.clustering.fixture_harness import (
+    assert_fixture_bounds,
+    fingerprint_clusterer,
+    time_window_clusterer,
+)
+from tests.clustering.metrics import score
+from tests.factories.campaign_factory import generate, load_yaml
+
+FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
+FIXTURE_YAML = FIXTURE_DIR / "paused_campaign.yaml"
+EXPECTED_YAML = FIXTURE_DIR / "paused_campaign.expected.yaml"
+
+
+def test_paused_campaign_corpus_shape() -> None:
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    assert len(corpus.attackers) == 2
+    truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
+    assert truth_campaigns == {"paused-campaign-001"}
+    # Both rows share the operator's JA3 and HASSH — load-bearing
+    # signal for fingerprint_clusterer to fold them.
+    ja3s = {a.ja3 for a in corpus.attackers}
+    hasshs = {a.hassh for a in corpus.attackers}
+    assert len(ja3s) == 1
+    assert len(hasshs) == 1
+    # Each row's session timeline lives in its actor's active_days.
+    rows_by_actor = {a.truth_actor_id: a for a in corpus.attackers}
+    sprint_1 = rows_by_actor["ops-sprint-1"]
+    sprint_2 = rows_by_actor["ops-sprint-2"]
+    sprint_1_days = {s.started_at.day for s in sprint_1.sessions}
+    sprint_2_days = {s.started_at.day for s in sprint_2.sessions}
+    # Epoch is 2026-01-01; active_days [0,1] → calendar days 1,2;
+    # active_days [5,6] → calendar days 6,7.
+    assert sprint_1_days <= {1, 2}, f"sprint-1 leaked outside its window: {sprint_1_days}"
+    assert sprint_2_days <= {6, 7}, f"sprint-2 leaked outside its window: {sprint_2_days}"
+
+
+def test_paused_campaign_pipeline_passes_bounds() -> None:
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    metrics = assert_fixture_bounds(corpus, fingerprint_clusterer, EXPECTED_YAML)
+    # Both rows share fingerprints → one predicted cluster.
+    pred = fingerprint_clusterer(corpus)
+    assert len(set(pred.values())) == 1
+    # Truth = 1 campaign of 2 rows; pred = 1 cluster of 2 rows → ARI 1.0.
+    assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
+
+
+def test_time_window_clusterer_fragments_campaign() -> None:
+    """
+    The fixture's reason for being. With a 4-day silence between
+    the two operational windows and a 1-day union threshold, the
+    bad clusterer cannot bridge the gap. The campaign splits in
+    two and completeness collapses.
+
+    If this test ever passes (time_window_clusterer satisfies the
+    bounds), the fixture has lost its discrimination power.
+    """
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    pred = time_window_clusterer(corpus, gap_days=1.0)
+    assert len(set(pred.values())) == 2, (
+        f"time-window clusterer should split into 2 clusters, got {len(set(pred.values()))}"
+    )
+
+    metrics = score(corpus.truth_labels(level="campaign"), pred)
+    assert metrics["completeness"] == pytest.approx(0.0)
+
+    bounds = {
+        "adjusted_rand_index": 0.85,
+        "homogeneity": 0.90,
+        "completeness": 0.80,
+        "singleton_recall": 0.95,
+    }
+    breaches = [k for k, floor in bounds.items() if metrics[k] < floor]
+    assert "completeness" in breaches, (
+        f"fixture failed to catch the bad clusterer; observed metrics: {metrics}"
+    )
+
+
+def test_time_window_clusterer_with_huge_gap_does_not_fragment() -> None:
+    """
+    Sanity for the time-window reference: with a gap larger than
+    the campaign's silent stretch, the two halves union into one.
+    Confirms the clusterer's behavior depends on the threshold,
+    not on something unrelated. (Pause is days 3-5 → max separation
+    between session ranges is ≈4 days; gap_days=10 must bridge.)
+    """
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    pred = time_window_clusterer(corpus, gap_days=10.0)
+    assert len(set(pred.values())) == 1
+
+
+def test_silent_stretch_actually_silent() -> None:
+    """No session may land inside the configured pause window."""
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    pause_calendar_days = {3, 4, 5}  # 1-indexed; pause_windows [[2,4]] in 0-indexed
+    leaked = [
+        s for s in corpus.sessions
+        if s.started_at.day in pause_calendar_days
+    ]
+    assert not leaked, (
+        f"sessions leaked into the silent stretch: "
+        f"{[(s.session_id, s.started_at) for s in leaked]}"
+    )
--- a/tests/clustering/test_shared_wordlist_fixture.py
+++ b/tests/clustering/test_shared_wordlist_fixture.py
@@ -0,0 +1,117 @@
+"""
+End-to-end pipeline test for fixture 1 (shared_wordlist).
+
+Two campaigns. Same SSH credential wordlist. Everything else divergent
+— ASN, IPs, JA3, HASSH, active hours.
+
+The fixture exists to defeat one specific failure mode: a clusterer
+that leans on credential-list overlap as a primary signal. Commodity
+wordlists (rockyou, defaults lists, top-1k common-credentials) are
+shared by hundreds of unrelated actors — credential overlap alone
+cannot identify a campaign.
+
+Two tests cover this:
+
+1. `test_shared_wordlist_pipeline_passes_bounds` — runs the placeholder
+   identity clusterer against the fixture. Trivially green (each
+   campaign has one actor → identity puts each in its own cluster).
+   This is the ratchet point: when the real algorithm replaces the
+   placeholder, this test must continue to pass.
+
+2. `test_credential_jaccard_clusterer_fails_homogeneity` — runs a
+   deliberately-bad clusterer that merges any two attackers whose
+   credential sets overlap above 50% Jaccard. Proves the fixture
+   actually catches what it's designed to catch: this clusterer DOES
+   merge the two campaigns, and the fixture's homogeneity floor (0.90)
+   is breached. If this test ever passes, our fixture or our metric
+   harness is broken.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from tests.clustering.fixture_harness import (
+    assert_fixture_bounds,
+    credential_jaccard_clusterer,
+    identity_clusterer,
+)
+from tests.clustering.metrics import score
+from tests.factories.campaign_factory import generate, load_yaml
+
+FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
+
+
+def test_shared_wordlist_pipeline_passes_bounds() -> None:
+    spec = load_yaml(FIXTURE_DIR / "shared_wordlist.yaml")
+    corpus = generate(spec, seed=0)
+    assert_fixture_bounds(
+        corpus, identity_clusterer, FIXTURE_DIR / "shared_wordlist.expected.yaml"
+    )
+
+
+def test_shared_wordlist_corpus_shape() -> None:
+    """Sanity: 2 campaigns × 1 actor = 2 attackers, 4 sessions
+    (delivery + credential_access × 3 sessions per campaign)."""
+    spec = load_yaml(FIXTURE_DIR / "shared_wordlist.yaml")
+    corpus = generate(spec, seed=0)
+    assert len(corpus.attackers) == 2
+    truth = corpus.truth_labels()
+    assert set(truth.values()) == {"shared-wordlist-A", "shared-wordlist-B"}
+    # Each attacker should have at least one credential_access session
+    # whose credentials_tried is the full shared list.
+    for att in corpus.attackers:
+        cred_sessions = [s for s in att.sessions if s.credentials_tried]
+        assert cred_sessions, f"attacker {att.attacker_id} has no credential sessions"
+        # All cred sessions should carry the same 8-entry wordlist.
+        for s in cred_sessions:
+            assert len(s.credentials_tried) == 8
+
+
+def test_credential_jaccard_clusterer_fails_homogeneity() -> None:
+    """
+    The fixture's reason for being. A naive clusterer that merges on
+    credential-set Jaccard ≥ 0.5 will fuse the two campaigns (Jaccard
+    = 1.0 on shared wordlists). That fusion drives homogeneity to 0
+    — exactly the failure mode the fixture protects against.
+
+    If this test ever PASSES (i.e. the bad clusterer scores high on
+    this fixture), the fixture has lost its discrimination power and
+    needs to be re-examined.
+    """
+    spec = load_yaml(FIXTURE_DIR / "shared_wordlist.yaml")
+    corpus = generate(spec, seed=0)
+    pred = credential_jaccard_clusterer(corpus, threshold=0.5)
+    metrics = score(corpus.truth_labels(), pred)
+    # The two campaigns must be merged by this clusterer.
+    assert len(set(pred.values())) == 1, (
+        "credential-Jaccard clusterer should merge both campaigns into one"
+    )
+    # And homogeneity must collapse — that's the signal a fixture-aware
+    # CI gate would use to reject the bad clusterer.
+    assert metrics["homogeneity"] == pytest.approx(0.0)
+
+
+def test_naive_clusterer_does_not_fool_the_fixture() -> None:
+    """
+    Belt-and-braces: even though the bad clusterer collapses
+    homogeneity, it might still pass *some* metrics (completeness is
+    actually 1.0 — all members of each true campaign land in the
+    single mega-cluster). The fixture's bound floor on homogeneity
+    (0.90) must reject it.
+    """
+    spec = load_yaml(FIXTURE_DIR / "shared_wordlist.yaml")
+    corpus = generate(spec, seed=0)
+    pred = credential_jaccard_clusterer(corpus, threshold=0.5)
+    metrics = score(corpus.truth_labels(), pred)
+    bounds = {
+        "adjusted_rand_index": 0.85,
+        "homogeneity": 0.90,
+        "completeness": 0.80,
+        "singleton_recall": 0.95,
+    }
+    breaches = [k for k, floor in bounds.items() if metrics[k] < floor]
+    assert "homogeneity" in breaches, (
+        f"fixture failed to catch the bad clusterer; observed metrics: {metrics}"
+    )
--- a/tests/clustering/test_similarity.py
+++ b/tests/clustering/test_similarity.py
@@ -0,0 +1,348 @@
+"""Unit tests for the similarity-graph primitives.
+
+Each edge function is tested in isolation: agreement → high score,
+disagreement → zero, missing-data → zero. Combination logic +
+thresholds live in the connected-components impl and are covered by
+the fixture suite once those land.
+"""
+from __future__ import annotations
+
+import pytest
+
+from decnet.clustering.impl.similarity import (
+    EDGE_THRESHOLD,
+    Observation,
+    combined_edge_weight,
+    from_synthetic,
+    high_weight_edge,
+    low_weight_edge,
+    medium_weight_edge,
+    very_low_weight_edge,
+)
+
+
+def _obs(**kwargs) -> Observation:
+    """Build an Observation with sensible defaults for tests."""
+    kwargs.setdefault("observation_id", "obs-x")
+    return Observation(**kwargs)
+
+
+# ─── high_weight_edge ──────────────────────────────────────────────────────
+
+
+def test_high_weight_ja3_match():
+    a = _obs(ja3="ja3-stable")
+    b = _obs(ja3="ja3-stable")
+    assert high_weight_edge(a, b) == 1.0
+
+
+def test_high_weight_hassh_match():
+    a = _obs(hassh="hassh-stable")
+    b = _obs(hassh="hassh-stable")
+    assert high_weight_edge(a, b) == 1.0
+
+
+def test_high_weight_payload_hash_overlap():
+    a = _obs(payload_hashes=frozenset({"pl-1", "pl-2"}))
+    b = _obs(payload_hashes=frozenset({"pl-2", "pl-3"}))
+    assert high_weight_edge(a, b) == 1.0
+
+
+def test_high_weight_c2_overlap():
+    a = _obs(c2_endpoints=frozenset({"c2.example.com"}))
+    b = _obs(c2_endpoints=frozenset({"c2.example.com", "c2-alt.example.com"}))
+    assert high_weight_edge(a, b) == 1.0
+
+
+def test_high_weight_no_match():
+    a = _obs(ja3="ja3-a", hassh="hassh-a", payload_hashes=frozenset({"x"}))
+    b = _obs(ja3="ja3-b", hassh="hassh-b", payload_hashes=frozenset({"y"}))
+    assert high_weight_edge(a, b) == 0.0
+
+
+def test_high_weight_both_null_ja3_does_not_match():
+    """Both-null JA3 must not be treated as 'agreement' — that would
+    fuse every un-fingerprinted noise scanner into one mega-cluster."""
+    a = _obs(ja3=None, hassh=None)
+    b = _obs(ja3=None, hassh=None)
+    assert high_weight_edge(a, b) == 0.0
+
+
+# ─── fingerprint-disagreement veto on payload / C2 ──────────────────────────
+
+
+def test_high_weight_veto_on_fingerprint_disagreement_with_shared_c2():
+    """Fixture 5 protection: two operators with distinct JA3 + HASSH
+    sharing a C2 endpoint must NOT score as identity match."""
+    a = _obs(ja3="ja3-A", hassh="hassh-A",
+             c2_endpoints=frozenset({"c2.shared.example"}))
+    b = _obs(ja3="ja3-B", hassh="hassh-B",
+             c2_endpoints=frozenset({"c2.shared.example"}))
+    assert high_weight_edge(a, b) == 0.0
+
+
+def test_high_weight_veto_on_fingerprint_disagreement_with_shared_payload():
+    """Same shape, payload signal — also vetoed."""
+    a = _obs(ja3="ja3-A", hassh="hassh-A",
+             payload_hashes=frozenset({"stage1"}))
+    b = _obs(ja3="ja3-B", hassh="hassh-B",
+             payload_hashes=frozenset({"stage1"}))
+    assert high_weight_edge(a, b) == 0.0
+
+
+def test_high_weight_no_veto_when_fingerprints_unknown():
+    """Two un-fingerprinted observations sharing C2 still cluster —
+    we don't veto without evidence of disagreement."""
+    a = _obs(c2_endpoints=frozenset({"c2.shared.example"}))
+    b = _obs(c2_endpoints=frozenset({"c2.shared.example"}))
+    assert high_weight_edge(a, b) == 1.0
+
+
+def test_high_weight_no_veto_when_one_side_unknown():
+    """One observation without fingerprints + one with — no
+    disagreement evidence, so shared C2 still clusters."""
+    a = _obs(ja3="ja3-A", hassh="hassh-A",
+             c2_endpoints=frozenset({"c2.shared.example"}))
+    b = _obs(c2_endpoints=frozenset({"c2.shared.example"}))
+    assert high_weight_edge(a, b) == 1.0
+
+
+def test_high_weight_partial_fingerprint_agreement_no_veto():
+    """JA3 agrees, HASSH disagrees → some agreement → no veto. The
+    veto only triggers on FULL disagreement."""
+    a = _obs(ja3="ja3-shared", hassh="hassh-A",
+             c2_endpoints=frozenset({"c2.shared.example"}))
+    b = _obs(ja3="ja3-shared", hassh="hassh-B",
+             c2_endpoints=frozenset({"c2.shared.example"}))
+    # JA3 agreement returns 1.0 immediately; veto never reached.
+    assert high_weight_edge(a, b) == 1.0
+
+
+def test_high_weight_partial_disagreement_one_slot_only_vetoes():
+    """One slot comparable + disagrees, other slot uncomparable
+    (one side null) → veto triggers (only available evidence is
+    disagreement)."""
+    a = _obs(ja3="ja3-A", hassh=None,
+             c2_endpoints=frozenset({"c2.shared.example"}))
+    b = _obs(ja3="ja3-B", hassh=None,
+             c2_endpoints=frozenset({"c2.shared.example"}))
+    assert high_weight_edge(a, b) == 0.0
+
+
+def test_high_weight_empty_sets_no_match():
+    a = _obs(payload_hashes=frozenset(), c2_endpoints=frozenset())
+    b = _obs(payload_hashes=frozenset(), c2_endpoints=frozenset())
+    assert high_weight_edge(a, b) == 0.0
+
+
+# ─── medium_weight_edge ────────────────────────────────────────────────────
+
+
+def test_medium_weight_jaccard_full_match_in_one_phase():
+    a = _obs(commands_by_phase={"discovery": ("ls", "id", "uname -a")})
+    b = _obs(commands_by_phase={"discovery": ("ls", "id", "uname -a")})
+    assert medium_weight_edge(a, b) == pytest.approx(1.0)
+
+
+def test_medium_weight_jaccard_partial_match():
+    a = _obs(commands_by_phase={"discovery": ("ls", "id", "uname -a", "whoami")})
+    b = _obs(commands_by_phase={"discovery": ("ls", "id")})
+    # |A∩B|=2, |A∪B|=4 → 0.5
+    assert medium_weight_edge(a, b) == pytest.approx(0.5)
+
+
+def test_medium_weight_picks_max_across_phases():
+    a = _obs(commands_by_phase={
+        "discovery": ("ls",),
+        "exploitation": ("./payload", "chmod +x payload"),
+    })
+    b = _obs(commands_by_phase={
+        "discovery": ("ps",),  # 0.0
+        "exploitation": ("./payload", "chmod +x payload"),  # 1.0
+    })
+    assert medium_weight_edge(a, b) == pytest.approx(1.0)
+
+
+def test_medium_weight_no_shared_phase_returns_zero():
+    a = _obs(commands_by_phase={"discovery": ("ls",)})
+    b = _obs(commands_by_phase={"exploitation": ("./payload",)})
+    assert medium_weight_edge(a, b) == 0.0
+
+
+def test_medium_weight_disjoint_commands_in_shared_phase():
+    a = _obs(commands_by_phase={"discovery": ("ls",)})
+    b = _obs(commands_by_phase={"discovery": ("ps",)})
+    # |A∩B|=0, |A∪B|=2
+    assert medium_weight_edge(a, b) == 0.0
+
+
+def test_medium_weight_empty_corpora_returns_zero():
+    a = _obs()
+    b = _obs()
+    assert medium_weight_edge(a, b) == 0.0
+
+
+# ─── low_weight_edge ───────────────────────────────────────────────────────
+
+
+def test_low_weight_credential_jaccard_match():
+    a = _obs(credentials=frozenset({("root", "toor"), ("admin", "admin")}))
+    b = _obs(credentials=frozenset({("root", "toor"), ("admin", "admin")}))
+    assert low_weight_edge(a, b) == pytest.approx(1.0)
+
+
+def test_low_weight_credential_partial_overlap():
+    a = _obs(credentials=frozenset({("root", "toor"), ("admin", "admin")}))
+    b = _obs(credentials=frozenset({("root", "toor"), ("user", "user")}))
+    assert low_weight_edge(a, b) == pytest.approx(1 / 3)
+
+
+def test_low_weight_no_credentials_returns_zero():
+    a = _obs()
+    b = _obs(credentials=frozenset({("root", "toor")}))
+    assert low_weight_edge(a, b) == 0.0
+
+
+# ─── very_low_weight_edge ──────────────────────────────────────────────────
+
+
+def test_very_low_weight_asn_match():
+    a = _obs(asn=64500)
+    b = _obs(asn=64500)
+    assert very_low_weight_edge(a, b) == 1.0
+
+
+def test_very_low_weight_asn_mismatch():
+    a = _obs(asn=64500)
+    b = _obs(asn=64501)
+    assert very_low_weight_edge(a, b) == 0.0
+
+
+def test_very_low_weight_asn_null_returns_zero():
+    a = _obs(asn=None)
+    b = _obs(asn=64500)
+    assert very_low_weight_edge(a, b) == 0.0
+
+
+# ─── time-agnostic invariant ───────────────────────────────────────────────
+
+
+def test_observations_carry_no_timestamps():
+    """Compile-time guarantee: Observation has no time fields, so no
+    edge function can accidentally start using them. Fixture 7 forbids
+    recency-decay clustering."""
+    field_names = set(Observation.__dataclass_fields__.keys())
+    forbidden = {"first_seen", "last_seen", "started_at", "session_midpoint", "timestamp"}
+    assert field_names.isdisjoint(forbidden), (
+        f"Observation grew time fields: {field_names & forbidden}. "
+        "Fixture 7 (slow_burn) forbids recency-aware clustering."
+    )
+
+
+# ─── from_synthetic adapter ────────────────────────────────────────────────
+
+
+# ─── combined_edge_weight tier discipline ─────────────────────────────────
+
+
+def test_combined_high_alone_crosses_threshold():
+    a = _obs(ja3="ja3-shared")
+    b = _obs(ja3="ja3-shared")
+    assert combined_edge_weight(a, b) >= EDGE_THRESHOLD
+
+
+def test_combined_medium_alone_below_threshold():
+    """Single medium-tier match must NOT cluster — medium is a
+    supporting signal, never a clustering driver on its own."""
+    a = _obs(commands_by_phase={"discovery": ("ls", "id", "uname")})
+    b = _obs(commands_by_phase={"discovery": ("ls", "id", "uname")})
+    weight = combined_edge_weight(a, b)
+    assert 0 < weight < EDGE_THRESHOLD
+
+
+def test_combined_low_alone_below_threshold():
+    """Credential-only overlap must NOT cluster — fixture 1's failure mode."""
+    a = _obs(credentials=frozenset({("root", "toor"), ("admin", "admin")}))
+    b = _obs(credentials=frozenset({("root", "toor"), ("admin", "admin")}))
+    weight = combined_edge_weight(a, b)
+    assert 0 < weight < EDGE_THRESHOLD
+
+
+def test_combined_very_low_alone_below_threshold():
+    """ASN-only overlap must NOT cluster — fixture 2's failure mode."""
+    a = _obs(asn=64500)
+    b = _obs(asn=64500)
+    weight = combined_edge_weight(a, b)
+    assert 0 < weight < EDGE_THRESHOLD
+
+
+def test_combined_all_weak_tiers_still_below_threshold():
+    """Even all three weaker tiers stacked don't reach threshold —
+    only a high-tier signal does."""
+    a = _obs(
+        asn=64500,
+        credentials=frozenset({("root", "toor")}),
+        commands_by_phase={"discovery": ("ls",)},
+    )
+    b = _obs(
+        asn=64500,
+        credentials=frozenset({("root", "toor")}),
+        commands_by_phase={"discovery": ("ls",)},
+    )
+    # 0.6*1.0 (medium) + 0.2*1.0 (low) + 0.05*1.0 (very_low) = 0.85
+    weight = combined_edge_weight(a, b)
+    assert weight < EDGE_THRESHOLD
+
+
+def test_combined_high_plus_medium_clusters():
+    a = _obs(ja3="ja3-x", commands_by_phase={"discovery": ("ls",)})
+    b = _obs(ja3="ja3-x", commands_by_phase={"discovery": ("ls",)})
+    assert combined_edge_weight(a, b) >= EDGE_THRESHOLD
+
+
+def test_combined_no_signal_returns_zero():
+    a = _obs()
+    b = _obs()
+    assert combined_edge_weight(a, b) == 0.0
+
+
+def test_from_synthetic_round_trip():
+    """The adapter projects a SyntheticAttacker into an Observation
+    that the edge functions can score over."""
+    from datetime import datetime, timezone
+    from tests.factories.campaign_factory import (
+        SyntheticAttacker, SyntheticSession,
+    )
+    from decnet.clustering.ukc import UKCPhase
+
+    now = datetime.now(timezone.utc)
+    sess = SyntheticSession(
+        session_id="s1",
+        attacker_id="a1",
+        decky_id="d1",
+        started_at=now,
+        duration_s=10.0,
+        phase=UKCPhase.DISCOVERY,
+        commands=["ls", "id"],
+        credentials_tried=[("root", "toor")],
+        payload_hash="pl-1",
+        c2_callback="c2.example.com",
+        truth_campaign_id="c1",
+        truth_actor_id="actor-1",
+    )
+    att = SyntheticAttacker(
+        attacker_id="a1", ip="1.1.1.1", asn=64500,
+        ja3="ja3-x", hassh="hassh-y",
+        first_seen=now, last_seen=now,
+        truth_campaign_id="c1", truth_actor_id="actor-1",
+        sessions=[sess],
+    )
+    obs = from_synthetic(att)
+    assert obs.observation_id == "a1"
+    assert obs.ja3 == "ja3-x"
+    assert obs.hassh == "hassh-y"
+    assert obs.asn == 64500
+    assert obs.payload_hashes == frozenset({"pl-1"})
+    assert obs.c2_endpoints == frozenset({"c2.example.com"})
+    assert obs.credentials == frozenset({("root", "toor")})
+    assert obs.commands_by_phase == {"discovery": ("ls", "id")}
--- a/tests/clustering/test_slow_burn_fixture.py
+++ b/tests/clustering/test_slow_burn_fixture.py
@@ -0,0 +1,128 @@
+"""
+End-to-end pipeline test for fixture 7 (slow_burn).
+
+90-day APT campaign with three operational windows separated by
+multi-week silences. Models the real operational tempo of an APT
+working a deep nested topology (MazeNET-style): recon over weeks,
+exploitation later, action-on-objectives later still. The unique
+signal this fixture stresses is TIME-AGNOSTIC IDENTITY — a
+clusterer that silently expires old edges fragments any campaign
+that operates over months.
+
+Three tests cover this:
+
+1. `test_slow_burn_corpus_shape` — sanity: 3 attackers, all share
+   campaign id and operator fingerprint, sessions land in their
+   respective operational windows.
+
+2. `test_slow_burn_pipeline_passes_bounds` —
+   `composite_signals_clusterer` (fingerprint OR C2 — time-agnostic)
+   folds all three windows into one cluster.
+
+3. `test_recency_decay_clusterer_fragments_campaign` — runs the
+   deliberately-bad `recency_decay_clusterer` with a 14-day half-
+   life and a 0.5 weight threshold. Edges between adjacent
+   operational windows (24+ days apart) decay below threshold and
+   drop. The campaign splits into three clusters; completeness
+   collapses; the bound floor rejects the bad clusterer.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from tests.clustering.fixture_harness import (
+    assert_fixture_bounds,
+    composite_signals_clusterer,
+    recency_decay_clusterer,
+)
+from tests.clustering.metrics import score
+from tests.factories.campaign_factory import generate, load_yaml
+
+FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
+FIXTURE_YAML = FIXTURE_DIR / "slow_burn.yaml"
+EXPECTED_YAML = FIXTURE_DIR / "slow_burn.expected.yaml"
+
+
+def test_slow_burn_corpus_shape() -> None:
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    assert len(corpus.attackers) == 3
+    truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
+    assert truth_campaigns == {"slow-burn-001"}
+    # Operator fingerprint stays stable across all three windows.
+    ja3s = {a.ja3 for a in corpus.attackers}
+    hasshs = {a.hassh for a in corpus.attackers}
+    assert len(ja3s) == 1
+    assert len(hasshs) == 1
+    # Each row's sessions land in its operational window.
+    by_actor = {a.truth_actor_id: a for a in corpus.attackers}
+    recon_days = {s.started_at.timetuple().tm_yday for s in by_actor["ops-recon"].sessions}
+    exploit_days = {s.started_at.timetuple().tm_yday for s in by_actor["ops-exploit"].sessions}
+    action_days = {s.started_at.timetuple().tm_yday for s in by_actor["ops-action"].sessions}
+    # Epoch is 2026-01-01 (day-of-year 1). active_days [7-11] →
+    # day-of-year [8-12]; [35-39] → [36-40]; [75-79] → [76-80].
+    assert recon_days <= {8, 9, 10, 11, 12}, recon_days
+    assert exploit_days <= {36, 37, 38, 39, 40}, exploit_days
+    assert action_days <= {76, 77, 78, 79, 80}, action_days
+
+
+def test_slow_burn_pipeline_passes_bounds() -> None:
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    metrics = assert_fixture_bounds(corpus, composite_signals_clusterer, EXPECTED_YAML)
+    pred = composite_signals_clusterer(corpus)
+    assert len(set(pred.values())) == 1, (
+        "composite_signals_clusterer should fold all three windows into one cluster"
+    )
+    assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
+
+
+def test_recency_decay_clusterer_fragments_campaign() -> None:
+    """
+    The fixture's reason for being. Recency decay with a 14-day
+    half-life expires edges between operational windows that are
+    24+ days apart, dropping their weight below the 0.5 threshold.
+    The campaign fragments into three clusters; completeness
+    collapses.
+
+    If this test ever passes (the bad clusterer satisfies the
+    bounds), the fixture has lost its discrimination power.
+    """
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    pred = recency_decay_clusterer(corpus, half_life_days=14.0, threshold=0.5)
+    assert len(set(pred.values())) == 3, (
+        f"recency-decay clusterer should split into 3 clusters, "
+        f"got {len(set(pred.values()))}"
+    )
+
+    metrics = score(corpus.truth_labels(level="campaign"), pred)
+    assert metrics["completeness"] == pytest.approx(0.0)
+
+    bounds = {
+        "adjusted_rand_index": 0.85,
+        "homogeneity": 0.90,
+        "completeness": 0.80,
+        "singleton_recall": 0.95,
+    }
+    breaches = [k for k, floor in bounds.items() if metrics[k] < floor]
+    assert "completeness" in breaches, (
+        f"fixture failed to catch the bad clusterer; observed metrics: {metrics}"
+    )
+
+
+def test_recency_decay_clusterer_with_long_halflife_does_not_fragment() -> None:
+    """
+    Sanity for the recency-decay reference: with a half-life longer
+    than the campaign duration, every edge survives the decay. The
+    three windows union into one. Confirms the clusterer's
+    behavior depends on the half-life parameter, not on something
+    unrelated. (Half-life 365 → edges across 40 days decay to
+    ~0.93, well above the 0.5 threshold.)
+    """
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    pred = recency_decay_clusterer(corpus, half_life_days=365.0, threshold=0.5)
+    assert len(set(pred.values())) == 1
--- a/tests/clustering/test_vpn_hopping_fixture.py
+++ b/tests/clustering/test_vpn_hopping_fixture.py
@@ -0,0 +1,126 @@
+"""
+End-to-end pipeline test for fixture 2 (vpn_hopping).
+
+One campaign, one actor, ip_pool: rotating across 5 distinct ASNs.
+JA3, HASSH, and payload_hash stable across every rotation. The
+fixture is the canonical "same hands, different IP/ASN" scenario
+that motivates Identity Resolution (see development/
+IDENTITY_RESOLUTION.md — these are the signals "the attacker can't
+cheaply rotate"). It also stresses the clusterer's weighting of
+ASN: the real similarity graph weights ASN match "very low" because
+VPN/proxy hopping shatters ASN within a single identity.
+
+Three tests cover this:
+
+1. `test_vpn_hopping_pipeline_passes_bounds_at_campaign_level` —
+   `fingerprint_clusterer` reference folds all 5 rotated rows into
+   one cluster (shared JA3 + HASSH). Trivially green at campaign-
+   level scoring; the test is a ratchet point for the real algorithm
+   to keep passing once it lands.
+
+2. `test_vpn_hopping_pipeline_passes_bounds_at_identity_level` —
+   same clusterer, scored against the identity-level oracle. Verifies
+   the factory's `truth_identity_id` plumbing across rotated rows
+   (commit f6b8375) actually expresses the right ground truth: 5
+   observations → 1 identity.
+
+3. `test_asn_clusterer_fragments_campaign` — runs the deliberately-
+   bad `asn_clusterer` reference. The 5 rotation_asns become 5
+   singleton clusters → completeness collapses to ~0, ARI collapses,
+   and the fixture's bound floor on completeness (0.80) rejects the
+   bad clusterer. If this test ever passes, the fixture has lost its
+   discrimination power.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from tests.clustering.fixture_harness import (
+    asn_clusterer,
+    assert_fixture_bounds,
+    fingerprint_clusterer,
+)
+from tests.clustering.metrics import score
+from tests.factories.campaign_factory import generate, load_yaml
+
+FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
+FIXTURE_YAML = FIXTURE_DIR / "vpn_hopping.yaml"
+EXPECTED_YAML = FIXTURE_DIR / "vpn_hopping.expected.yaml"
+
+
+def test_vpn_hopping_corpus_shape() -> None:
+    """One actor, rotation_count=5 → 5 observation rows, 1 identity, 1 campaign."""
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    assert len(corpus.attackers) == 5
+    truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
+    truth_identities = {a.truth_identity_id for a in corpus.attackers}
+    truth_actors = {a.truth_actor_id for a in corpus.attackers}
+    assert truth_campaigns == {"vpn-hopping-001"}
+    assert len(truth_identities) == 1, "all 5 rotations must share one truth_identity_id"
+    assert truth_actors == {"hopper-a"}
+    asns = {a.asn for a in corpus.attackers}
+    assert asns == {64512, 64513, 64514, 64515, 64516}
+    ips = {a.ip for a in corpus.attackers}
+    assert len(ips) == 5, "rotation must produce 5 distinct IPs"
+    # Stable fingerprints across every row — the load-bearing signal.
+    ja3s = {a.ja3 for a in corpus.attackers}
+    hasshs = {a.hassh for a in corpus.attackers}
+    assert len(ja3s) == 1
+    assert len(hasshs) == 1
+
+
+def test_vpn_hopping_pipeline_passes_bounds_at_campaign_level() -> None:
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    assert_fixture_bounds(corpus, fingerprint_clusterer, EXPECTED_YAML)
+
+
+def test_vpn_hopping_pipeline_passes_bounds_at_identity_level() -> None:
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    metrics = assert_fixture_bounds(
+        corpus, fingerprint_clusterer, EXPECTED_YAML, truth_level="identity"
+    )
+    # All 5 observations should land in the same predicted cluster
+    # AND share one truth identity → ARI is exactly 1.0.
+    assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
+    assert metrics["completeness"] == pytest.approx(1.0)
+
+
+def test_asn_clusterer_fragments_campaign() -> None:
+    """
+    The fixture's reason for being. Group by ASN and the campaign
+    shatters into 5 singletons — completeness goes to 0 because the
+    one true class is split across 5 predicted clusters. The bound
+    floor on completeness (0.80) must reject this.
+
+    If this test ever passes (asn_clusterer satisfies the bounds),
+    the fixture has lost its discrimination power.
+    """
+    spec = load_yaml(FIXTURE_YAML)
+    corpus = generate(spec, seed=0)
+    pred = asn_clusterer(corpus)
+    # 5 distinct ASNs in the rotation → 5 distinct predicted clusters.
+    assert len(set(pred.values())) == 5
+
+    metrics = score(corpus.truth_labels(level="campaign"), pred)
+    # Completeness collapses — that's the failure mode the fixture
+    # protects against.
+    assert metrics["completeness"] == pytest.approx(0.0)
+    # ARI collapses too (very different partitions).
+    assert metrics["adjusted_rand_index"] < 0.1
+
+    # The bound floor would reject this clusterer.
+    bounds = {
+        "adjusted_rand_index": 0.85,
+        "homogeneity": 0.90,
+        "completeness": 0.80,
+        "singleton_recall": 0.95,
+    }
+    breaches = [k for k, floor in bounds.items() if metrics[k] < floor]
+    assert "completeness" in breaches, (
+        f"fixture failed to catch the bad clusterer; observed metrics: {metrics}"
+    )