Files
DECNET/tests/clustering/test_metrics.py
anti 00254629f8 feat(clustering): UKC phase enum + synthetic campaign factory + metric harness
Pre-implementation scaffolding for campaign clustering. The simulator is
the spec — algorithm code follows once fixtures + metrics are stable.

* decnet/clustering/ukc.py — UKCPhase enum (19 phases across In/Through/Out
  stages), OBSERVABLE_PHASES set, stage_of() helper. Vocabulary aligns
  with future MITRE ATT&CK tagging so synthetic data and runtime phase
  inference don't need renaming when TTP-tagging lands.
* tests/factories/campaign_factory.py — YAML DSL parser + deterministic
  generator emitting truth-labeled SyntheticAttacker / SyntheticSession
  records. Validates phase names, warns on unobservable phases, supports
  multi-campaign + noise corpora.
* tests/clustering/metrics.py — pure-Python ARI / homogeneity /
  completeness / singleton_recall (no sklearn dep). Decided before any
  algorithm exists, on purpose.
* tests/fixtures/campaigns/lone_wolf.{yaml,expected.yaml} — fixture 3
  from the design doc; simplest of the six, exercises the full pipeline
  with an identity-clusterer placeholder.
* development/CAMPAIGN_CLUSTERING.md — design spec for the feature.
* development/DEVELOPMENT_V2.md — note on DSL evolution path
  (concurrent phases, multi-actor per phase) deferred post-v1.
2026-04-26 06:29:10 -04:00

77 lines
2.9 KiB
Python

"""Sanity tests for the clustering metric harness."""
from __future__ import annotations
import pytest
from tests.clustering.metrics import (
adjusted_rand_index,
completeness,
homogeneity,
score,
singleton_recall,
)
def test_perfect_agreement_scores_one() -> None:
truth = {"a": "C1", "b": "C1", "c": "C2", "d": "C2"}
# Same partition, different label names — clustering doesn't preserve
# names, so renamed-but-isomorphic must still score 1.0.
pred = {"a": "X", "b": "X", "c": "Y", "d": "Y"}
s = score(truth, pred)
assert s["adjusted_rand_index"] == pytest.approx(1.0)
assert s["homogeneity"] == pytest.approx(1.0)
assert s["completeness"] == pytest.approx(1.0)
assert s["singleton_recall"] == pytest.approx(1.0)
def test_all_singletons_perfect() -> None:
truth = {"a": "A", "b": "B", "c": "C"}
pred = {"a": "1", "b": "2", "c": "3"}
s = score(truth, pred)
assert s["singleton_recall"] == pytest.approx(1.0)
assert s["adjusted_rand_index"] == pytest.approx(1.0)
def test_false_merge_drops_homogeneity() -> None:
truth = {"a": "C1", "b": "C2"}
pred = {"a": "X", "b": "X"} # merged two distinct campaigns
assert homogeneity(truth, pred) == pytest.approx(0.0)
# Completeness is fine (each true class lives in one cluster).
assert completeness(truth, pred) == pytest.approx(1.0)
def test_false_split_drops_completeness() -> None:
truth = {"a": "C1", "b": "C1"}
pred = {"a": "X", "b": "Y"} # split one campaign into two clusters
assert completeness(truth, pred) == pytest.approx(0.0)
assert homogeneity(truth, pred) == pytest.approx(1.0)
def test_singleton_recall_penalises_noise_absorption() -> None:
# 3 lone wolves + 1 real campaign with 2 members.
truth = {"w1": "wolf1", "w2": "wolf2", "w3": "wolf3", "c1": "C", "c2": "C"}
# Clusterer absorbs all wolves into the campaign.
pred = dict.fromkeys(truth, "BIG")
assert singleton_recall(truth, pred) == pytest.approx(0.0)
# And a clusterer that keeps wolves singleton should score 1.0
# on this metric, regardless of what it does with the campaign.
pred_ok = {"w1": "1", "w2": "2", "w3": "3", "c1": "C", "c2": "C"}
assert singleton_recall(truth, pred_ok) == pytest.approx(1.0)
def test_mismatched_item_sets_raises() -> None:
with pytest.raises(ValueError):
adjusted_rand_index({"a": "X"}, {"b": "Y"})
def test_random_labels_low_ari() -> None:
# ARI of an arbitrary partition vs. ground truth should be near 0,
# not near 1 — this is the chance-correction guarantee.
truth = {f"i{n}": f"C{n // 4}" for n in range(20)}
# Pred that ignores truth: just shuffles items into 5 buckets in
# an order uncorrelated with truth.
pred = {f"i{n}": f"X{(n * 7) % 5}" for n in range(20)}
ari = adjusted_rand_index(truth, pred)
# Loose bound — the point is "much closer to 0 than to 1".
assert ari < 0.3