test(clustering): F7 slow-burn time-agnostic invariant
Fixture 7 ratchet: one campaign across 3 multi-week operational windows with stable JA3 + HASSH + C2. The production clusterer must fold all 3 into one cluster despite multi-week silence between windows; completeness = 1.0. Time-shift invariance test: applying a +90 day delta to every session start (and the per-attacker first/last seen) must produce the same cluster membership as the baseline. This is the runtime counterpart of the static no-time-fields check on Observation. If either check ever fails, the clusterer has accidentally grown a recency-aware edge — fixture 7's whole reason for existing.
This commit is contained in:
@@ -414,6 +414,57 @@ def test_cluster_observations_medium_alone_does_not_fuse():
|
|||||||
assert labels["a"] != labels["b"]
|
assert labels["a"] != labels["b"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_slow_burn_passes_with_production_clusterer():
|
||||||
|
"""Fixture 7 (slow_burn): one campaign across 3 multi-week operational
|
||||||
|
windows. Shared JA3 + HASSH + C2 across all 3 actors. The production
|
||||||
|
clusterer must fold them into one cluster — *despite* the multi-week
|
||||||
|
silence between windows. Time-agnostic invariant in action."""
|
||||||
|
from tests.clustering.fixture_harness import assert_fixture_bounds
|
||||||
|
from tests.factories.campaign_factory import generate, load_yaml
|
||||||
|
|
||||||
|
corpus = generate(load_yaml(FIXTURE_DIR / "slow_burn.yaml"), seed=0)
|
||||||
|
metrics = assert_fixture_bounds(
|
||||||
|
corpus, _production_clusterer_predict,
|
||||||
|
FIXTURE_DIR / "slow_burn.expected.yaml",
|
||||||
|
)
|
||||||
|
pred = _production_clusterer_predict(corpus)
|
||||||
|
# All three operational windows in one cluster — the F7 contract.
|
||||||
|
assert len(set(pred.values())) == 1
|
||||||
|
assert metrics["completeness"] == pytest.approx(1.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_slow_burn_time_shift_invariance():
|
||||||
|
"""Time-agnostic invariant in execution: shifting every observation's
|
||||||
|
session timestamps by an arbitrary delta must not change the
|
||||||
|
predicted clusters. This is the runtime counterpart of the
|
||||||
|
Observation-no-time-fields static check in test_similarity.py."""
|
||||||
|
from datetime import timedelta
|
||||||
|
from tests.factories.campaign_factory import generate, load_yaml
|
||||||
|
|
||||||
|
corpus = generate(load_yaml(FIXTURE_DIR / "slow_burn.yaml"), seed=0)
|
||||||
|
baseline = _production_clusterer_predict(corpus)
|
||||||
|
|
||||||
|
# Shift every session by +90 days (a full multi-month gap) and
|
||||||
|
# re-cluster. Predicted membership must be identical.
|
||||||
|
for att in corpus.attackers:
|
||||||
|
att.first_seen += timedelta(days=90)
|
||||||
|
att.last_seen += timedelta(days=90)
|
||||||
|
for s in att.sessions:
|
||||||
|
s.started_at += timedelta(days=90)
|
||||||
|
|
||||||
|
shifted = _production_clusterer_predict(corpus)
|
||||||
|
# Cluster ids may differ as opaque labels but membership groupings
|
||||||
|
# must match. Convert each prediction to canonical form: a set of
|
||||||
|
# frozensets of co-clustered observation_ids.
|
||||||
|
def _canonical(pred: dict[str, str]) -> set[frozenset[str]]:
|
||||||
|
groups: dict[str, set[str]] = {}
|
||||||
|
for oid, cid in pred.items():
|
||||||
|
groups.setdefault(cid, set()).add(oid)
|
||||||
|
return {frozenset(g) for g in groups.values()}
|
||||||
|
|
||||||
|
assert _canonical(baseline) == _canonical(shifted)
|
||||||
|
|
||||||
|
|
||||||
def test_vpn_hopping_passes_at_identity_level_with_production_clusterer():
|
def test_vpn_hopping_passes_at_identity_level_with_production_clusterer():
|
||||||
"""Fixture 2: one rotating actor with stable JA3 + HASSH across
|
"""Fixture 2: one rotating actor with stable JA3 + HASSH across
|
||||||
5 ASNs. The production clusterer must fold all 5 observations into
|
5 ASNs. The production clusterer must fold all 5 observations into
|
||||||
|
|||||||
Reference in New Issue
Block a user