merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
117
tests/clustering/test_shared_wordlist_fixture.py
Normal file
117
tests/clustering/test_shared_wordlist_fixture.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
End-to-end pipeline test for fixture 1 (shared_wordlist).
|
||||
|
||||
Two campaigns. Same SSH credential wordlist. Everything else divergent
|
||||
— ASN, IPs, JA3, HASSH, active hours.
|
||||
|
||||
The fixture exists to defeat one specific failure mode: a clusterer
|
||||
that leans on credential-list overlap as a primary signal. Commodity
|
||||
wordlists (rockyou, defaults lists, top-1k common-credentials) are
|
||||
shared by hundreds of unrelated actors — credential overlap alone
|
||||
cannot identify a campaign.
|
||||
|
||||
Two tests cover this:
|
||||
|
||||
1. `test_shared_wordlist_pipeline_passes_bounds` — runs the placeholder
|
||||
identity clusterer against the fixture. Trivially green (each
|
||||
campaign has one actor → identity puts each in its own cluster).
|
||||
This is the ratchet point: when the real algorithm replaces the
|
||||
placeholder, this test must continue to pass.
|
||||
|
||||
2. `test_credential_jaccard_clusterer_fails_homogeneity` — runs a
|
||||
deliberately-bad clusterer that merges any two attackers whose
|
||||
credential sets overlap above 50% Jaccard. Proves the fixture
|
||||
actually catches what it's designed to catch: this clusterer DOES
|
||||
merge the two campaigns, and the fixture's homogeneity floor (0.90)
|
||||
is breached. If this test ever passes, our fixture or our metric
|
||||
harness is broken.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.clustering.fixture_harness import (
|
||||
assert_fixture_bounds,
|
||||
credential_jaccard_clusterer,
|
||||
identity_clusterer,
|
||||
)
|
||||
from tests.clustering.metrics import score
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
|
||||
|
||||
|
||||
def test_shared_wordlist_pipeline_passes_bounds() -> None:
|
||||
spec = load_yaml(FIXTURE_DIR / "shared_wordlist.yaml")
|
||||
corpus = generate(spec, seed=0)
|
||||
assert_fixture_bounds(
|
||||
corpus, identity_clusterer, FIXTURE_DIR / "shared_wordlist.expected.yaml"
|
||||
)
|
||||
|
||||
|
||||
def test_shared_wordlist_corpus_shape() -> None:
|
||||
"""Sanity: 2 campaigns × 1 actor = 2 attackers, 4 sessions
|
||||
(delivery + credential_access × 3 sessions per campaign)."""
|
||||
spec = load_yaml(FIXTURE_DIR / "shared_wordlist.yaml")
|
||||
corpus = generate(spec, seed=0)
|
||||
assert len(corpus.attackers) == 2
|
||||
truth = corpus.truth_labels()
|
||||
assert set(truth.values()) == {"shared-wordlist-A", "shared-wordlist-B"}
|
||||
# Each attacker should have at least one credential_access session
|
||||
# whose credentials_tried is the full shared list.
|
||||
for att in corpus.attackers:
|
||||
cred_sessions = [s for s in att.sessions if s.credentials_tried]
|
||||
assert cred_sessions, f"attacker {att.attacker_id} has no credential sessions"
|
||||
# All cred sessions should carry the same 8-entry wordlist.
|
||||
for s in cred_sessions:
|
||||
assert len(s.credentials_tried) == 8
|
||||
|
||||
|
||||
def test_credential_jaccard_clusterer_fails_homogeneity() -> None:
|
||||
"""
|
||||
The fixture's reason for being. A naive clusterer that merges on
|
||||
credential-set Jaccard ≥ 0.5 will fuse the two campaigns (Jaccard
|
||||
= 1.0 on shared wordlists). That fusion drives homogeneity to 0
|
||||
— exactly the failure mode the fixture protects against.
|
||||
|
||||
If this test ever PASSES (i.e. the bad clusterer scores high on
|
||||
this fixture), the fixture has lost its discrimination power and
|
||||
needs to be re-examined.
|
||||
"""
|
||||
spec = load_yaml(FIXTURE_DIR / "shared_wordlist.yaml")
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = credential_jaccard_clusterer(corpus, threshold=0.5)
|
||||
metrics = score(corpus.truth_labels(), pred)
|
||||
# The two campaigns must be merged by this clusterer.
|
||||
assert len(set(pred.values())) == 1, (
|
||||
"credential-Jaccard clusterer should merge both campaigns into one"
|
||||
)
|
||||
# And homogeneity must collapse — that's the signal a fixture-aware
|
||||
# CI gate would use to reject the bad clusterer.
|
||||
assert metrics["homogeneity"] == pytest.approx(0.0)
|
||||
|
||||
|
||||
def test_naive_clusterer_does_not_fool_the_fixture() -> None:
|
||||
"""
|
||||
Belt-and-braces: even though the bad clusterer collapses
|
||||
homogeneity, it might still pass *some* metrics (completeness is
|
||||
actually 1.0 — all members of each true campaign land in the
|
||||
single mega-cluster). The fixture's bound floor on homogeneity
|
||||
(0.90) must reject it.
|
||||
"""
|
||||
spec = load_yaml(FIXTURE_DIR / "shared_wordlist.yaml")
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = credential_jaccard_clusterer(corpus, threshold=0.5)
|
||||
metrics = score(corpus.truth_labels(), pred)
|
||||
bounds = {
|
||||
"adjusted_rand_index": 0.85,
|
||||
"homogeneity": 0.90,
|
||||
"completeness": 0.80,
|
||||
"singleton_recall": 0.95,
|
||||
}
|
||||
breaches = [k for k, floor in bounds.items() if metrics[k] < floor]
|
||||
assert "homogeneity" in breaches, (
|
||||
f"fixture failed to catch the bad clusterer; observed metrics: {metrics}"
|
||||
)
|
||||
Reference in New Issue
Block a user