Pre-implementation scaffolding for campaign clustering. The simulator is
the spec — algorithm code follows once fixtures + metrics are stable.
* decnet/clustering/ukc.py — UKCPhase enum (19 phases across In/Through/Out
stages), OBSERVABLE_PHASES set, stage_of() helper. Vocabulary aligns
with future MITRE ATT&CK tagging so synthetic data and runtime phase
inference don't need renaming when TTP-tagging lands.
* tests/factories/campaign_factory.py — YAML DSL parser + deterministic
generator emitting truth-labeled SyntheticAttacker / SyntheticSession
records. Validates phase names, warns on unobservable phases, supports
multi-campaign + noise corpora.
* tests/clustering/metrics.py — pure-Python ARI / homogeneity /
completeness / singleton_recall (no sklearn dep). Decided before any
algorithm exists, on purpose.
* tests/fixtures/campaigns/lone_wolf.{yaml,expected.yaml} — fixture 3
from the design doc; simplest of the six, exercises the full pipeline
with an identity-clusterer placeholder.
* development/CAMPAIGN_CLUSTERING.md — design spec for the feature.
* development/DEVELOPMENT_V2.md — note on DSL evolution path
(concurrent phases, multi-actor per phase) deferred post-v1.
77 lines
2.9 KiB
Python
77 lines
2.9 KiB
Python
"""Sanity tests for the clustering metric harness."""
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from tests.clustering.metrics import (
|
|
adjusted_rand_index,
|
|
completeness,
|
|
homogeneity,
|
|
score,
|
|
singleton_recall,
|
|
)
|
|
|
|
|
|
def test_perfect_agreement_scores_one() -> None:
|
|
truth = {"a": "C1", "b": "C1", "c": "C2", "d": "C2"}
|
|
# Same partition, different label names — clustering doesn't preserve
|
|
# names, so renamed-but-isomorphic must still score 1.0.
|
|
pred = {"a": "X", "b": "X", "c": "Y", "d": "Y"}
|
|
s = score(truth, pred)
|
|
assert s["adjusted_rand_index"] == pytest.approx(1.0)
|
|
assert s["homogeneity"] == pytest.approx(1.0)
|
|
assert s["completeness"] == pytest.approx(1.0)
|
|
assert s["singleton_recall"] == pytest.approx(1.0)
|
|
|
|
|
|
def test_all_singletons_perfect() -> None:
|
|
truth = {"a": "A", "b": "B", "c": "C"}
|
|
pred = {"a": "1", "b": "2", "c": "3"}
|
|
s = score(truth, pred)
|
|
assert s["singleton_recall"] == pytest.approx(1.0)
|
|
assert s["adjusted_rand_index"] == pytest.approx(1.0)
|
|
|
|
|
|
def test_false_merge_drops_homogeneity() -> None:
|
|
truth = {"a": "C1", "b": "C2"}
|
|
pred = {"a": "X", "b": "X"} # merged two distinct campaigns
|
|
assert homogeneity(truth, pred) == pytest.approx(0.0)
|
|
# Completeness is fine (each true class lives in one cluster).
|
|
assert completeness(truth, pred) == pytest.approx(1.0)
|
|
|
|
|
|
def test_false_split_drops_completeness() -> None:
|
|
truth = {"a": "C1", "b": "C1"}
|
|
pred = {"a": "X", "b": "Y"} # split one campaign into two clusters
|
|
assert completeness(truth, pred) == pytest.approx(0.0)
|
|
assert homogeneity(truth, pred) == pytest.approx(1.0)
|
|
|
|
|
|
def test_singleton_recall_penalises_noise_absorption() -> None:
|
|
# 3 lone wolves + 1 real campaign with 2 members.
|
|
truth = {"w1": "wolf1", "w2": "wolf2", "w3": "wolf3", "c1": "C", "c2": "C"}
|
|
# Clusterer absorbs all wolves into the campaign.
|
|
pred = dict.fromkeys(truth, "BIG")
|
|
assert singleton_recall(truth, pred) == pytest.approx(0.0)
|
|
# And a clusterer that keeps wolves singleton should score 1.0
|
|
# on this metric, regardless of what it does with the campaign.
|
|
pred_ok = {"w1": "1", "w2": "2", "w3": "3", "c1": "C", "c2": "C"}
|
|
assert singleton_recall(truth, pred_ok) == pytest.approx(1.0)
|
|
|
|
|
|
def test_mismatched_item_sets_raises() -> None:
|
|
with pytest.raises(ValueError):
|
|
adjusted_rand_index({"a": "X"}, {"b": "Y"})
|
|
|
|
|
|
def test_random_labels_low_ari() -> None:
|
|
# ARI of an arbitrary partition vs. ground truth should be near 0,
|
|
# not near 1 — this is the chance-correction guarantee.
|
|
truth = {f"i{n}": f"C{n // 4}" for n in range(20)}
|
|
# Pred that ignores truth: just shuffles items into 5 buckets in
|
|
# an order uncorrelated with truth.
|
|
pred = {f"i{n}": f"X{(n * 7) % 5}" for n in range(20)}
|
|
ari = adjusted_rand_index(truth, pred)
|
|
# Loose bound — the point is "much closer to 0 than to 1".
|
|
assert ari < 0.3
|