merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
0
tests/clustering/__init__.py
Normal file
0
tests/clustering/__init__.py
Normal file
488
tests/clustering/fixture_harness.py
Normal file
488
tests/clustering/fixture_harness.py
Normal file
@@ -0,0 +1,488 @@
|
||||
"""
|
||||
Shared helpers for fixture-driven clustering tests.
|
||||
|
||||
Each fixture lives at `tests/fixtures/campaigns/<name>.yaml` with paired
|
||||
`<name>.expected.yaml` bound file. The harness here keeps every per-
|
||||
fixture test file down to "load corpus → predict → assert bounds" without
|
||||
copy-pasting the bound-walk loop or reference clusterers across files.
|
||||
|
||||
Reference clusterers are provided as the algorithm under test in each
|
||||
fixture's bound assertions; their names describe the *signal* they
|
||||
cluster on, not the quality of the result.
|
||||
|
||||
* `identity_clusterer` — every attacker is its own cluster. Trivially
|
||||
passes any fixture whose ground truth is all singletons (lone_wolf,
|
||||
shared_wordlist before merge, etc). Useful as a green baseline while
|
||||
the real connected-components algorithm is under construction.
|
||||
|
||||
* `fingerprint_clusterer` — groups attackers by ``(ja3, hassh)``.
|
||||
Approximates the "stable signals an attacker can't cheaply rotate"
|
||||
arm of the planned similarity graph (see IDENTITY_RESOLUTION.md
|
||||
Premise). Folds rotated-IP observations of one actor into one
|
||||
cluster when the actor's JA3 + HASSH stay stable. Attackers whose
|
||||
fingerprints are both NULL (typical of un-fingerprinted noise
|
||||
scanners) are treated as un-mergeable — each becomes its own
|
||||
singleton — so this clusterer doesn't trivially fuse all noise
|
||||
into one mega-cluster.
|
||||
|
||||
* `credential_jaccard_clusterer` — deliberately-bad reference that
|
||||
merges any two attackers whose credential-attempt sets overlap above
|
||||
a threshold. Exists so fixtures like `shared_wordlist` can prove
|
||||
they fail a clusterer that relies on credential overlap alone — the
|
||||
whole point of fixture #1.
|
||||
|
||||
* `asn_clusterer` — deliberately-bad reference that groups attackers
|
||||
by source ASN. Exists so fixtures like `vpn_hopping` (fixture #2)
|
||||
can prove they fail a clusterer that treats ASN match as a
|
||||
high-weight signal — VPN/proxy hopping shatters ASN within a single
|
||||
identity and a clusterer that leans on it tanks completeness.
|
||||
|
||||
* `time_window_clusterer` — deliberately-bad reference that unions
|
||||
attackers whose session time-ranges are within ``gap_days`` of each
|
||||
other. Exists so fixtures like `paused_campaign` (fixture #4) can
|
||||
prove they fail a clusterer that treats short-window time proximity
|
||||
as a primary signal — operators pause, sleep, take weekends.
|
||||
|
||||
* `c2_callback_clusterer` — union-find on overlapping C2 callback
|
||||
sets. Pass-clusterer for fixture 5 (multi_operator), where two
|
||||
operators with distinct tooling share a C2 endpoint as the
|
||||
load-bearing campaign signal. Attackers with no C2 endpoints
|
||||
become their own singleton.
|
||||
|
||||
* `shift_clusterer` — deliberately-bad reference that buckets
|
||||
attackers by majority session-start hour into night/day/swing.
|
||||
Exists so fixture 5 can prove they fail a clusterer that treats
|
||||
shift schedule as a primary signal — operators on different
|
||||
schedules can still share a campaign.
|
||||
|
||||
* `composite_signals_clusterer` — union-find that combines
|
||||
``(ja3, hassh)`` match OR shared C2 callback into the same
|
||||
cluster. Approximates the planned similarity graph well enough
|
||||
to score the combined-corpus fixture (fixture 6, noise_floor).
|
||||
|
||||
* `recency_decay_clusterer` — deliberately-bad reference that
|
||||
starts from the same composite signal graph but weights each
|
||||
edge by ``exp(-time_distance / half_life_days)`` and drops
|
||||
edges below a threshold. Adversarial reference for fixture 7
|
||||
(slow_burn): the canonical production failure mode where a
|
||||
graph clusterer with recency decay fragments long-running
|
||||
APT campaigns by silently expiring multi-week-old edges.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from tests.clustering.metrics import score
|
||||
from tests.factories.campaign_factory import GeneratedCorpus
|
||||
|
||||
PredictFn = Callable[[GeneratedCorpus], dict[str, str]]
|
||||
|
||||
|
||||
def assert_fixture_bounds(
|
||||
corpus: GeneratedCorpus,
|
||||
predict: PredictFn,
|
||||
expected_path: str | Path,
|
||||
*,
|
||||
truth_level: str = "campaign",
|
||||
) -> dict[str, float]:
|
||||
"""
|
||||
Run `predict` against the corpus, score against ground truth, and
|
||||
assert every metric meets the floor declared in `expected_path`.
|
||||
|
||||
``truth_level`` selects the oracle: ``"campaign"`` (default) for
|
||||
campaign-clustering fixtures, ``"identity"`` for identity-resolution
|
||||
fixtures (where the clusterer's job is to fold N rotated-IP
|
||||
observations into one identity), or ``"actor"`` for completeness.
|
||||
|
||||
Returns the observed metrics dict so callers can do additional
|
||||
assertions (e.g. "homogeneity is *exactly* 1.0 for this fixture").
|
||||
"""
|
||||
bounds = yaml.safe_load(Path(expected_path).read_text(encoding="utf-8"))
|
||||
truth = corpus.truth_labels(level=truth_level)
|
||||
pred = predict(corpus)
|
||||
metrics = score(truth, pred)
|
||||
|
||||
failures = []
|
||||
for name, bound in bounds.items():
|
||||
observed = metrics[name]
|
||||
floor = bound["min"]
|
||||
if observed < floor:
|
||||
failures.append(f"{name}={observed:.3f} < min {floor:.3f}")
|
||||
assert not failures, (
|
||||
"fixture bounds violated: " + "; ".join(failures)
|
||||
+ f" (full metrics: {metrics})"
|
||||
)
|
||||
return metrics
|
||||
|
||||
|
||||
# ─── Reference clusterers ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
def identity_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
|
||||
"""Every attacker → its own cluster. Placeholder until §4 algorithm lands."""
|
||||
return {a.attacker_id: f"cluster-{a.attacker_id}" for a in corpus.attackers}
|
||||
|
||||
|
||||
def fingerprint_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
|
||||
"""Group by ``(ja3, hassh)``. Un-fingerprinted rows stay singleton.
|
||||
|
||||
Approximates the stable-signal arm of the planned similarity graph;
|
||||
the real algorithm in `decnet/clustering/` will extend this with
|
||||
payload simhashes, C2 callback overlap, and phase-handoff edges.
|
||||
"""
|
||||
pred: dict[str, str] = {}
|
||||
for att in corpus.attackers:
|
||||
if att.ja3 is None and att.hassh is None:
|
||||
# No fingerprint to share — un-mergeable, own cluster.
|
||||
pred[att.attacker_id] = f"fp-singleton-{att.attacker_id}"
|
||||
else:
|
||||
pred[att.attacker_id] = f"fp::{att.ja3}::{att.hassh}"
|
||||
return pred
|
||||
|
||||
|
||||
def asn_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
|
||||
"""Group by source ASN. Deliberately-bad — see fixture 2."""
|
||||
return {a.attacker_id: f"asn-{a.asn}" for a in corpus.attackers}
|
||||
|
||||
|
||||
def _union_find(ids: list[str]) -> tuple[
|
||||
dict[str, str], Callable[[str], str], Callable[[str, str], None]
|
||||
]:
|
||||
"""Return (parent, find, union) for a fresh union-find over ``ids``."""
|
||||
parent: dict[str, str] = {aid: aid for aid in ids}
|
||||
|
||||
def find(x: str) -> str:
|
||||
while parent[x] != x:
|
||||
parent[x] = parent[parent[x]]
|
||||
x = parent[x]
|
||||
return x
|
||||
|
||||
def union(x: str, y: str) -> None:
|
||||
rx, ry = find(x), find(y)
|
||||
if rx != ry:
|
||||
parent[rx] = ry
|
||||
|
||||
return parent, find, union
|
||||
|
||||
|
||||
def c2_callback_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
|
||||
"""Union attackers whose session-collected C2 callback sets overlap.
|
||||
|
||||
Attackers with no C2 callbacks become their own singleton (an
|
||||
un-fingerprinted opportunistic scanner has no link to anyone).
|
||||
"""
|
||||
callbacks: dict[str, set[str]] = {}
|
||||
for att in corpus.attackers:
|
||||
callbacks[att.attacker_id] = {
|
||||
s.c2_callback for s in att.sessions if s.c2_callback
|
||||
}
|
||||
|
||||
ids = list(callbacks.keys())
|
||||
_parent, find, union = _union_find(ids)
|
||||
|
||||
for i, a in enumerate(ids):
|
||||
sa = callbacks[a]
|
||||
if not sa:
|
||||
continue
|
||||
for b in ids[i + 1 :]:
|
||||
sb = callbacks[b]
|
||||
if not sb:
|
||||
continue
|
||||
if sa & sb:
|
||||
union(a, b)
|
||||
|
||||
pred: dict[str, str] = {}
|
||||
for aid in ids:
|
||||
if not callbacks[aid]:
|
||||
pred[aid] = f"c2-none-{aid}"
|
||||
else:
|
||||
pred[aid] = f"c2-{find(aid)}"
|
||||
return pred
|
||||
|
||||
|
||||
def shift_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
|
||||
"""Bucket attackers by majority session-start hour into night /
|
||||
day / swing. Deliberately-bad — see fixture 5.
|
||||
|
||||
Buckets:
|
||||
* night — hours [22, 23, 0, 1, 2, 3, 4, 5]
|
||||
* day — hours [6, 7, 8, 9, 10, 11, 12, 13]
|
||||
* swing — hours [14, 15, 16, 17, 18, 19, 20, 21]
|
||||
|
||||
Attackers with no sessions become their own singleton.
|
||||
"""
|
||||
night = {22, 23, 0, 1, 2, 3, 4, 5}
|
||||
day = {6, 7, 8, 9, 10, 11, 12, 13}
|
||||
|
||||
def bucket(hour: int) -> str:
|
||||
if hour in night:
|
||||
return "night"
|
||||
if hour in day:
|
||||
return "day"
|
||||
return "swing"
|
||||
|
||||
pred: dict[str, str] = {}
|
||||
for att in corpus.attackers:
|
||||
if not att.sessions:
|
||||
pred[att.attacker_id] = f"shift-none-{att.attacker_id}"
|
||||
continue
|
||||
counts: dict[str, int] = {}
|
||||
for s in att.sessions:
|
||||
b = bucket(s.started_at.hour)
|
||||
counts[b] = counts.get(b, 0) + 1
|
||||
majority = max(counts, key=lambda k: counts[k])
|
||||
pred[att.attacker_id] = f"shift-{majority}"
|
||||
return pred
|
||||
|
||||
|
||||
def composite_signals_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
|
||||
"""Union-find combining ``(ja3, hassh)`` match OR overlapping C2
|
||||
callback sets. Approximates the stable-signals + C2-overlap arms
|
||||
of the planned similarity graph; used as the pass-clusterer for
|
||||
fixture 6 where multiple campaigns + noise are scored together.
|
||||
|
||||
Attackers with NO signals (no fingerprint, no C2) stay singleton.
|
||||
"""
|
||||
callbacks: dict[str, set[str]] = {}
|
||||
fingerprint: dict[str, tuple[str | None, str | None] | None] = {}
|
||||
for att in corpus.attackers:
|
||||
callbacks[att.attacker_id] = {
|
||||
s.c2_callback for s in att.sessions if s.c2_callback
|
||||
}
|
||||
if att.ja3 is None and att.hassh is None:
|
||||
fingerprint[att.attacker_id] = None
|
||||
else:
|
||||
fingerprint[att.attacker_id] = (att.ja3, att.hassh)
|
||||
|
||||
ids = list(callbacks.keys())
|
||||
_parent, find, union = _union_find(ids)
|
||||
|
||||
# Fingerprint edges.
|
||||
by_fp: dict[tuple[str | None, str | None], list[str]] = {}
|
||||
for aid, fp in fingerprint.items():
|
||||
if fp is None:
|
||||
continue
|
||||
by_fp.setdefault(fp, []).append(aid)
|
||||
for group in by_fp.values():
|
||||
anchor = group[0]
|
||||
for other in group[1:]:
|
||||
union(anchor, other)
|
||||
|
||||
# C2 overlap edges.
|
||||
for i, a in enumerate(ids):
|
||||
sa = callbacks[a]
|
||||
if not sa:
|
||||
continue
|
||||
for b in ids[i + 1 :]:
|
||||
sb = callbacks[b]
|
||||
if not sb:
|
||||
continue
|
||||
if sa & sb:
|
||||
union(a, b)
|
||||
|
||||
pred: dict[str, str] = {}
|
||||
for aid in ids:
|
||||
if fingerprint[aid] is None and not callbacks[aid]:
|
||||
pred[aid] = f"composite-singleton-{aid}"
|
||||
else:
|
||||
pred[aid] = f"composite-{find(aid)}"
|
||||
return pred
|
||||
|
||||
|
||||
def recency_decay_clusterer(
|
||||
corpus: GeneratedCorpus,
|
||||
*,
|
||||
half_life_days: float = 14.0,
|
||||
threshold: float = 0.5,
|
||||
) -> dict[str, str]:
|
||||
"""Composite-signal graph with exponential time decay on edges.
|
||||
|
||||
Same edge construction as ``composite_signals_clusterer``
|
||||
(fingerprint match OR overlapping C2), but each edge's weight
|
||||
is multiplied by ``exp(-time_distance / half_life_days)`` where
|
||||
``time_distance`` is the gap (in days) between the two attackers'
|
||||
session-midpoint timestamps. Edges with decayed weight below
|
||||
``threshold`` are dropped before connected components are
|
||||
extracted.
|
||||
|
||||
Deliberately-bad reference for fixture 7 (slow_burn): an APT
|
||||
campaign that operates over months will be fragmented by any
|
||||
clusterer that silently expires old edges. This is the canonical
|
||||
production failure mode for recency-weighted graph clustering on
|
||||
long-running threat actors.
|
||||
|
||||
Attackers with no signals or no sessions stay singleton.
|
||||
"""
|
||||
import math
|
||||
from datetime import timedelta
|
||||
|
||||
callbacks: dict[str, set[str]] = {}
|
||||
fingerprint: dict[str, tuple[str | None, str | None] | None] = {}
|
||||
midpoint: dict[str, "object | None"] = {}
|
||||
for att in corpus.attackers:
|
||||
callbacks[att.attacker_id] = {
|
||||
s.c2_callback for s in att.sessions if s.c2_callback
|
||||
}
|
||||
if att.ja3 is None and att.hassh is None:
|
||||
fingerprint[att.attacker_id] = None
|
||||
else:
|
||||
fingerprint[att.attacker_id] = (att.ja3, att.hassh)
|
||||
if att.sessions:
|
||||
starts = [s.started_at for s in att.sessions]
|
||||
ends = [s.started_at + timedelta(seconds=s.duration_s) for s in att.sessions]
|
||||
mid = min(starts) + (max(ends) - min(starts)) / 2
|
||||
midpoint[att.attacker_id] = mid
|
||||
else:
|
||||
midpoint[att.attacker_id] = None
|
||||
|
||||
ids = list(callbacks.keys())
|
||||
_parent, find, union = _union_find(ids)
|
||||
|
||||
def edge_strength(a: str, b: str) -> float:
|
||||
"""Base signal strength before time decay; 1.0 on match, else 0."""
|
||||
fa, fb = fingerprint[a], fingerprint[b]
|
||||
if fa is not None and fb is not None and fa == fb:
|
||||
return 1.0
|
||||
sa, sb = callbacks[a], callbacks[b]
|
||||
if sa and sb and (sa & sb):
|
||||
return 1.0
|
||||
return 0.0
|
||||
|
||||
for i, a in enumerate(ids):
|
||||
ma = midpoint[a]
|
||||
if ma is None:
|
||||
continue
|
||||
for b in ids[i + 1 :]:
|
||||
mb = midpoint[b]
|
||||
if mb is None:
|
||||
continue
|
||||
base = edge_strength(a, b)
|
||||
if base <= 0.0:
|
||||
continue
|
||||
gap_days = abs((ma - mb).total_seconds()) / 86400.0
|
||||
weight = base * math.exp(-gap_days / half_life_days)
|
||||
if weight >= threshold:
|
||||
union(a, b)
|
||||
|
||||
pred: dict[str, str] = {}
|
||||
for aid in ids:
|
||||
if fingerprint[aid] is None and not callbacks[aid]:
|
||||
pred[aid] = f"recency-singleton-{aid}"
|
||||
else:
|
||||
pred[aid] = f"recency-{find(aid)}"
|
||||
return pred
|
||||
|
||||
|
||||
def time_window_clusterer(
|
||||
corpus: GeneratedCorpus, *, gap_days: float = 1.0
|
||||
) -> dict[str, str]:
|
||||
"""Union-find over attackers, edge if their session time-ranges
|
||||
overlap or are within ``gap_days`` of each other.
|
||||
|
||||
Deliberately-bad reference for fixture 4 (paused_campaign): a
|
||||
campaign that goes silent for several days will be split into
|
||||
"before pause" and "after pause" clusters by this clusterer,
|
||||
breaching completeness. The real algorithm must not lean on
|
||||
short-window time proximity as a primary signal — operators
|
||||
pause, sleep, switch shifts, take weekends. Time bursts are a
|
||||
weak hint, not a hard partition.
|
||||
|
||||
Attackers with no sessions become their own singleton cluster.
|
||||
"""
|
||||
from datetime import timedelta
|
||||
|
||||
gap = timedelta(days=gap_days)
|
||||
ids = [a.attacker_id for a in corpus.attackers]
|
||||
ranges: dict[str, tuple] = {}
|
||||
for att in corpus.attackers:
|
||||
if not att.sessions:
|
||||
continue
|
||||
starts = [s.started_at for s in att.sessions]
|
||||
ends = [s.started_at + timedelta(seconds=s.duration_s) for s in att.sessions]
|
||||
ranges[att.attacker_id] = (min(starts), max(ends))
|
||||
|
||||
parent: dict[str, str] = {aid: aid for aid in ids}
|
||||
|
||||
def find(x: str) -> str:
|
||||
while parent[x] != x:
|
||||
parent[x] = parent[parent[x]]
|
||||
x = parent[x]
|
||||
return x
|
||||
|
||||
def union(x: str, y: str) -> None:
|
||||
rx, ry = find(x), find(y)
|
||||
if rx != ry:
|
||||
parent[rx] = ry
|
||||
|
||||
keys = list(ranges.keys())
|
||||
for i, a in enumerate(keys):
|
||||
a_start, a_end = ranges[a]
|
||||
for b in keys[i + 1 :]:
|
||||
b_start, b_end = ranges[b]
|
||||
# Time-distance between the two ranges (0 if they overlap).
|
||||
if a_end < b_start:
|
||||
separation = b_start - a_end
|
||||
elif b_end < a_start:
|
||||
separation = a_start - b_end
|
||||
else:
|
||||
separation = timedelta(0)
|
||||
if separation <= gap:
|
||||
union(a, b)
|
||||
|
||||
return {aid: find(aid) for aid in ids}
|
||||
|
||||
|
||||
def credential_jaccard_clusterer(
|
||||
corpus: GeneratedCorpus, *, threshold: float = 0.5
|
||||
) -> dict[str, str]:
|
||||
"""
|
||||
Deliberately-bad reference: union-find over attackers, edge whenever
|
||||
two attackers' credential-attempt sets have Jaccard ≥ threshold.
|
||||
|
||||
Used to demonstrate that fixtures targeting credential-overlap
|
||||
failure modes (fixture 1: shared_wordlist) actually catch a clusterer
|
||||
that leans on credential signals alone. NOT the real algorithm.
|
||||
"""
|
||||
# Build per-attacker credential sets.
|
||||
creds: dict[str, set[tuple[str, str]]] = {}
|
||||
for att in corpus.attackers:
|
||||
s: set[tuple[str, str]] = set()
|
||||
for sess in att.sessions:
|
||||
s.update(sess.credentials_tried)
|
||||
creds[att.attacker_id] = s
|
||||
|
||||
# Union-find.
|
||||
parent: dict[str, str] = {aid: aid for aid in creds}
|
||||
|
||||
def find(x: str) -> str:
|
||||
while parent[x] != x:
|
||||
parent[x] = parent[parent[x]]
|
||||
x = parent[x]
|
||||
return x
|
||||
|
||||
def union(x: str, y: str) -> None:
|
||||
rx, ry = find(x), find(y)
|
||||
if rx != ry:
|
||||
parent[rx] = ry
|
||||
|
||||
ids = list(creds.keys())
|
||||
for i, a in enumerate(ids):
|
||||
sa = creds[a]
|
||||
if not sa:
|
||||
continue
|
||||
for b in ids[i + 1 :]:
|
||||
sb = creds[b]
|
||||
if not sb:
|
||||
continue
|
||||
inter = len(sa & sb)
|
||||
union_size = len(sa | sb)
|
||||
if union_size == 0:
|
||||
continue
|
||||
jaccard = inter / union_size
|
||||
if jaccard >= threshold:
|
||||
union(a, b)
|
||||
|
||||
return {aid: find(aid) for aid in ids}
|
||||
179
tests/clustering/metrics.py
Normal file
179
tests/clustering/metrics.py
Normal file
@@ -0,0 +1,179 @@
|
||||
"""
|
||||
Clustering metric harness — see development/CAMPAIGN_CLUSTERING.md §3.
|
||||
|
||||
Decided BEFORE any clustering algorithm exists, on purpose: if the
|
||||
metrics get picked after seeing results, they'll flatter whatever the
|
||||
algorithm happens to produce.
|
||||
|
||||
Four metrics, none on its own sufficient:
|
||||
|
||||
* Adjusted Rand Index — headline number, chance-corrected agreement
|
||||
between predicted clusters and ground truth.
|
||||
* Homogeneity — each predicted cluster contains only one true class.
|
||||
Catches FALSE MERGES (campaigns wrongly fused).
|
||||
* Completeness — every member of a true class lands in the same
|
||||
predicted cluster. Catches FALSE SPLITS (one campaign wrongly torn
|
||||
apart).
|
||||
* Singleton recall — fraction of ground-truth singletons (lone wolves,
|
||||
background noise) that are kept singleton by the clusterer.
|
||||
|
||||
Implemented from first principles in pure Python so the test harness
|
||||
doesn't pull sklearn/numpy into the runtime dependency surface.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
|
||||
def _comb2(n: int) -> int:
|
||||
"""C(n, 2) — number of unordered pairs from n items."""
|
||||
return n * (n - 1) // 2 if n >= 2 else 0
|
||||
|
||||
|
||||
def adjusted_rand_index(truth: dict[str, str], pred: dict[str, str]) -> float:
|
||||
"""
|
||||
Adjusted Rand Index between two clusterings over the same item set.
|
||||
|
||||
Range: typically [0, 1]; can dip negative for worse-than-random
|
||||
labelings. 1.0 = identical partitions (up to label renaming),
|
||||
0.0 ≈ chance agreement.
|
||||
|
||||
Both args map item_id -> cluster_id. Items must align exactly.
|
||||
"""
|
||||
if set(truth) != set(pred):
|
||||
raise ValueError(
|
||||
"ARI requires identical item sets in truth and pred "
|
||||
f"(missing in pred: {set(truth) - set(pred)}, "
|
||||
f"missing in truth: {set(pred) - set(truth)})"
|
||||
)
|
||||
n = len(truth)
|
||||
if n < 2:
|
||||
return 1.0 # trivially "agree" on <2 items
|
||||
|
||||
# Build the contingency table n_ij = |cluster_i ∩ class_j|.
|
||||
contingency: dict[tuple[str, str], int] = defaultdict(int)
|
||||
for item, t_label in truth.items():
|
||||
p_label = pred[item]
|
||||
contingency[(p_label, t_label)] += 1
|
||||
|
||||
sum_comb = sum(_comb2(v) for v in contingency.values())
|
||||
a_counts = Counter(pred.values()) # row sums (predicted clusters)
|
||||
b_counts = Counter(truth.values()) # column sums (true classes)
|
||||
sum_a = sum(_comb2(v) for v in a_counts.values())
|
||||
sum_b = sum(_comb2(v) for v in b_counts.values())
|
||||
total_pairs = _comb2(n)
|
||||
|
||||
expected = (sum_a * sum_b) / total_pairs if total_pairs else 0.0
|
||||
max_index = (sum_a + sum_b) / 2
|
||||
if max_index == expected:
|
||||
# Degenerate: both clusterings are trivially equal in structure
|
||||
# (both all-singletons, or both one-big-cluster). The math forces
|
||||
# this — see the algebra of max_index = expected. The induced
|
||||
# partitions are necessarily identical, so ARI is 1.0. (sklearn
|
||||
# adopts the same convention.)
|
||||
return 1.0
|
||||
return (sum_comb - expected) / (max_index - expected)
|
||||
|
||||
|
||||
def _entropy(counts: list[int], total: int) -> float:
|
||||
if total == 0:
|
||||
return 0.0
|
||||
h = 0.0
|
||||
for c in counts:
|
||||
if c == 0:
|
||||
continue
|
||||
p = c / total
|
||||
h -= p * math.log(p)
|
||||
return h
|
||||
|
||||
|
||||
def _conditional_entropy(
|
||||
contingency: dict[tuple[str, str], int],
|
||||
given_counts: dict[str, int],
|
||||
total: int,
|
||||
) -> float:
|
||||
"""H(rows | cols) — i.e. entropy of class within each cluster."""
|
||||
if total == 0:
|
||||
return 0.0
|
||||
h = 0.0
|
||||
by_col: dict[str, list[int]] = defaultdict(list)
|
||||
for (row, col), v in contingency.items():
|
||||
by_col[col].append(v)
|
||||
for col, vs in by_col.items():
|
||||
col_total = given_counts[col]
|
||||
if col_total == 0:
|
||||
continue
|
||||
col_entropy = _entropy(vs, col_total)
|
||||
h += (col_total / total) * col_entropy
|
||||
return h
|
||||
|
||||
|
||||
def homogeneity(truth: dict[str, str], pred: dict[str, str]) -> float:
|
||||
"""
|
||||
1 - H(truth | pred) / H(truth). 1.0 = each predicted cluster
|
||||
contains only members of a single true class (no false merges).
|
||||
"""
|
||||
n = len(truth)
|
||||
if n == 0:
|
||||
return 1.0
|
||||
contingency: dict[tuple[str, str], int] = defaultdict(int)
|
||||
for item, t in truth.items():
|
||||
contingency[(t, pred[item])] += 1
|
||||
truth_counts = Counter(truth.values())
|
||||
pred_counts = Counter(pred.values())
|
||||
h_truth = _entropy(list(truth_counts.values()), n)
|
||||
if h_truth == 0:
|
||||
return 1.0
|
||||
h_truth_given_pred = _conditional_entropy(contingency, dict(pred_counts), n)
|
||||
return 1.0 - (h_truth_given_pred / h_truth)
|
||||
|
||||
|
||||
def completeness(truth: dict[str, str], pred: dict[str, str]) -> float:
|
||||
"""
|
||||
1 - H(pred | truth) / H(pred). 1.0 = all members of each true class
|
||||
are assigned to the same predicted cluster (no false splits).
|
||||
"""
|
||||
n = len(truth)
|
||||
if n == 0:
|
||||
return 1.0
|
||||
contingency: dict[tuple[str, str], int] = defaultdict(int)
|
||||
for item, t in truth.items():
|
||||
contingency[(pred[item], t)] += 1
|
||||
pred_counts = Counter(pred.values())
|
||||
truth_counts = Counter(truth.values())
|
||||
h_pred = _entropy(list(pred_counts.values()), n)
|
||||
if h_pred == 0:
|
||||
return 1.0
|
||||
h_pred_given_truth = _conditional_entropy(contingency, dict(truth_counts), n)
|
||||
return 1.0 - (h_pred_given_truth / h_pred)
|
||||
|
||||
|
||||
def singleton_recall(truth: dict[str, str], pred: dict[str, str]) -> float:
|
||||
"""
|
||||
Fraction of ground-truth singletons that the clusterer kept singleton.
|
||||
|
||||
A "true singleton" is an item whose truth-campaign has exactly one
|
||||
member (lone wolves, background noise scanners). The metric exists
|
||||
because ARI/homogeneity/completeness all dilute the cost of a
|
||||
clusterer that absorbs noise into real campaigns — and noise
|
||||
absorption is the failure mode that makes campaign attribution
|
||||
useless in practice.
|
||||
"""
|
||||
truth_counts = Counter(truth.values())
|
||||
true_singletons = [item for item, t in truth.items() if truth_counts[t] == 1]
|
||||
if not true_singletons:
|
||||
return 1.0
|
||||
pred_counts = Counter(pred.values())
|
||||
kept = sum(1 for item in true_singletons if pred_counts[pred[item]] == 1)
|
||||
return kept / len(true_singletons)
|
||||
|
||||
|
||||
def score(truth: dict[str, str], pred: dict[str, str]) -> dict[str, float]:
|
||||
"""One-shot bundle the four metrics for fixture reports."""
|
||||
return {
|
||||
"adjusted_rand_index": adjusted_rand_index(truth, pred),
|
||||
"homogeneity": homogeneity(truth, pred),
|
||||
"completeness": completeness(truth, pred),
|
||||
"singleton_recall": singleton_recall(truth, pred),
|
||||
}
|
||||
318
tests/clustering/test_campaign_factory.py
Normal file
318
tests/clustering/test_campaign_factory.py
Normal file
@@ -0,0 +1,318 @@
|
||||
"""Determinism + DSL-validation tests for the synthetic campaign factory."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.clustering.ukc import UKCPhase
|
||||
from tests.factories.campaign_factory import (
|
||||
DSLValidationError,
|
||||
generate,
|
||||
)
|
||||
|
||||
|
||||
def _minimal_spec() -> dict:
|
||||
return {
|
||||
"campaign": {
|
||||
"id": "c-test",
|
||||
"actors": [{"id": "a-1", "asn": 64512}],
|
||||
"phases": [{"name": "delivery", "actor": "a-1"}],
|
||||
"duration_days": 1,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def test_generation_is_deterministic_given_seed() -> None:
|
||||
spec = _minimal_spec()
|
||||
a = generate(spec, seed=42)
|
||||
b = generate(spec, seed=42)
|
||||
# IDs are RNG-driven — same seed must produce identical IDs, not
|
||||
# merely identical structure. Otherwise federation gossip and
|
||||
# fixture diffing both break.
|
||||
assert [att.attacker_id for att in a.attackers] == [
|
||||
att.attacker_id for att in b.attackers
|
||||
]
|
||||
assert [s.session_id for s in a.sessions] == [s.session_id for s in b.sessions]
|
||||
|
||||
|
||||
def test_different_seeds_produce_different_ids() -> None:
|
||||
spec = _minimal_spec()
|
||||
a = generate(spec, seed=1)
|
||||
b = generate(spec, seed=2)
|
||||
assert a.attackers[0].attacker_id != b.attackers[0].attacker_id
|
||||
|
||||
|
||||
def test_truth_labels_match_dsl() -> None:
|
||||
spec = _minimal_spec()
|
||||
corpus = generate(spec, seed=0)
|
||||
assert corpus.attackers[0].truth_campaign_id == "c-test"
|
||||
assert corpus.attackers[0].truth_actor_id == "a-1"
|
||||
# truth_labels() returns the dict the metric harness consumes.
|
||||
labels = corpus.truth_labels()
|
||||
assert labels[corpus.attackers[0].attacker_id] == "c-test"
|
||||
|
||||
|
||||
def test_unobservable_phase_emits_no_events() -> None:
|
||||
spec = _minimal_spec()
|
||||
spec["campaign"]["phases"] = [
|
||||
{"name": "reconnaissance", "actor": "a-1"}, # pre-target, unobservable
|
||||
{"name": "delivery", "actor": "a-1"},
|
||||
]
|
||||
corpus = generate(spec, seed=0)
|
||||
# Only the delivery phase should produce sessions.
|
||||
assert all(s.phase == UKCPhase.DELIVERY for s in corpus.sessions)
|
||||
assert len(corpus.sessions) == 1
|
||||
|
||||
|
||||
def test_unknown_phase_name_raises() -> None:
|
||||
spec = _minimal_spec()
|
||||
spec["campaign"]["phases"] = [{"name": "make_coffee", "actor": "a-1"}]
|
||||
with pytest.raises(DSLValidationError, match="unknown UKC phase"):
|
||||
generate(spec, seed=0)
|
||||
|
||||
|
||||
def test_phase_referencing_unknown_actor_raises() -> None:
|
||||
spec = _minimal_spec()
|
||||
spec["campaign"]["phases"] = [{"name": "delivery", "actor": "ghost"}]
|
||||
with pytest.raises(DSLValidationError, match="unknown actor"):
|
||||
generate(spec, seed=0)
|
||||
|
||||
|
||||
def test_noise_scanners_are_truth_singletons() -> None:
|
||||
spec = {
|
||||
"corpus": {
|
||||
"campaigns": [_minimal_spec()],
|
||||
"noise": {"scanner_count": 5},
|
||||
}
|
||||
}
|
||||
corpus = generate(spec, seed=0)
|
||||
# 1 campaign actor + 5 noise scanners = 6 distinct truth campaigns.
|
||||
truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
|
||||
assert len(truth_campaigns) == 6
|
||||
|
||||
|
||||
def test_multi_actor_campaign_shares_campaign_id() -> None:
|
||||
spec = {
|
||||
"campaign": {
|
||||
"id": "c-shared",
|
||||
"actors": [
|
||||
{"id": "a-1", "asn": 14061},
|
||||
{"id": "a-2", "asn": 14061},
|
||||
],
|
||||
"phases": [
|
||||
{"name": "delivery", "actor": "a-1"},
|
||||
{"name": "discovery", "actor": "a-2"},
|
||||
],
|
||||
"duration_days": 1,
|
||||
}
|
||||
}
|
||||
corpus = generate(spec, seed=0)
|
||||
truth = corpus.truth_labels()
|
||||
# Both attacker rows must point to the SAME truth_campaign_id —
|
||||
# this is the property fixture 5 (multi_operator) hinges on.
|
||||
assert set(truth.values()) == {"c-shared"}
|
||||
|
||||
|
||||
# ─── ip_pool: rotating — identity-resolution fixture support ────────────────
|
||||
|
||||
|
||||
def test_rotating_ip_pool_emits_one_row_per_rotation_count() -> None:
|
||||
"""
|
||||
``rotation_count: 5`` produces 5 SyntheticAttacker rows for that
|
||||
one DSL actor. Sticky default still produces 1.
|
||||
"""
|
||||
spec = {
|
||||
"campaign": {
|
||||
"id": "c-rotating",
|
||||
"actors": [{
|
||||
"id": "a-1",
|
||||
"asn": 14061,
|
||||
"ip_pool": "rotating",
|
||||
"rotation_count": 5,
|
||||
"ja3": "JA3-fixed",
|
||||
"hassh": "HASSH-fixed",
|
||||
}],
|
||||
"phases": [{"name": "delivery", "actor": "a-1",
|
||||
"target_selector": {"count": 10}}],
|
||||
"duration_days": 1,
|
||||
}
|
||||
}
|
||||
corpus = generate(spec, seed=0)
|
||||
assert len(corpus.attackers) == 5
|
||||
|
||||
|
||||
def test_rotating_rows_share_identity_and_fingerprints_but_differ_on_ip() -> None:
|
||||
"""
|
||||
All rotated rows MUST share truth_identity_id, truth_actor_id,
|
||||
truth_campaign_id, ja3, hassh — these are the stable signals the
|
||||
clusterer uses to recover identity. They MUST differ on ip — that's
|
||||
what makes the test interesting.
|
||||
"""
|
||||
spec = {
|
||||
"campaign": {
|
||||
"id": "c-vpn-hop",
|
||||
"actors": [{
|
||||
"id": "a-1",
|
||||
"asn": 14061,
|
||||
"ip_pool": "rotating",
|
||||
"rotation_count": 5,
|
||||
"ja3": "JA3-fixed",
|
||||
"hassh": "HASSH-fixed",
|
||||
}],
|
||||
"phases": [{"name": "delivery", "actor": "a-1",
|
||||
"target_selector": {"count": 5}}],
|
||||
"duration_days": 1,
|
||||
}
|
||||
}
|
||||
corpus = generate(spec, seed=0)
|
||||
rows = corpus.attackers
|
||||
# Stable: shared across all 5 rows.
|
||||
assert len({r.truth_identity_id for r in rows}) == 1
|
||||
assert len({r.truth_actor_id for r in rows}) == 1
|
||||
assert len({r.truth_campaign_id for r in rows}) == 1
|
||||
assert len({r.ja3 for r in rows}) == 1
|
||||
assert len({r.hassh for r in rows}) == 1
|
||||
# Rotating: 5 distinct IPs.
|
||||
assert len({r.ip for r in rows}) == 5
|
||||
|
||||
|
||||
def test_rotation_asns_distributed_across_rows() -> None:
|
||||
"""
|
||||
When ``rotation_asns`` is provided, each rotated row gets the
|
||||
corresponding ASN (cycling if shorter than rotation_count).
|
||||
"""
|
||||
spec = {
|
||||
"campaign": {
|
||||
"id": "c-multi-asn",
|
||||
"actors": [{
|
||||
"id": "a-1",
|
||||
"asn": 14061, # primary, ignored when rotation_asns is set
|
||||
"ip_pool": "rotating",
|
||||
"rotation_count": 5,
|
||||
"rotation_asns": [14061, 7922, 16509, 14618, 13335],
|
||||
"ja3": "x", "hassh": "y",
|
||||
}],
|
||||
"phases": [{"name": "delivery", "actor": "a-1",
|
||||
"target_selector": {"count": 5}}],
|
||||
"duration_days": 1,
|
||||
}
|
||||
}
|
||||
corpus = generate(spec, seed=0)
|
||||
asns = [r.asn for r in corpus.attackers]
|
||||
assert asns == [14061, 7922, 16509, 14618, 13335]
|
||||
|
||||
|
||||
def test_rotation_asns_cycle_when_shorter_than_count() -> None:
|
||||
"""rotation_asns of length 2 with rotation_count=5 cycles."""
|
||||
spec = {
|
||||
"campaign": {
|
||||
"id": "c-cycle",
|
||||
"actors": [{
|
||||
"id": "a-1",
|
||||
"ip_pool": "rotating",
|
||||
"rotation_count": 5,
|
||||
"rotation_asns": [100, 200],
|
||||
"ja3": "x", "hassh": "y",
|
||||
}],
|
||||
"phases": [{"name": "delivery", "actor": "a-1"}],
|
||||
"duration_days": 1,
|
||||
}
|
||||
}
|
||||
corpus = generate(spec, seed=0)
|
||||
assert [r.asn for r in corpus.attackers] == [100, 200, 100, 200, 100]
|
||||
|
||||
|
||||
def test_sessions_distribute_round_robin_across_rotated_rows() -> None:
|
||||
"""
|
||||
With rotation_count=3 and 9 sessions in a phase, each row should
|
||||
receive 3 sessions (round-robin). This is what makes the clusterer
|
||||
job realistic — every observation row carries its own session
|
||||
timeline that the clusterer joins via shared fingerprints.
|
||||
"""
|
||||
spec = {
|
||||
"campaign": {
|
||||
"id": "c-rr",
|
||||
"actors": [{
|
||||
"id": "a-1",
|
||||
"ip_pool": "rotating",
|
||||
"rotation_count": 3,
|
||||
"ja3": "x", "hassh": "y",
|
||||
}],
|
||||
"phases": [{"name": "delivery", "actor": "a-1",
|
||||
"target_selector": {"count": 9}}],
|
||||
"duration_days": 1,
|
||||
}
|
||||
}
|
||||
corpus = generate(spec, seed=0)
|
||||
counts = sorted(len(r.sessions) for r in corpus.attackers)
|
||||
assert counts == [3, 3, 3]
|
||||
|
||||
|
||||
def test_truth_labels_at_identity_level() -> None:
|
||||
"""
|
||||
corpus.truth_labels(level="identity") returns the identity-level
|
||||
oracle the clusterer is scored against. Rotated rows for one DSL
|
||||
actor share an identity label even though they have distinct
|
||||
attacker_ids.
|
||||
"""
|
||||
spec = {
|
||||
"campaign": {
|
||||
"id": "c-rot",
|
||||
"actors": [{
|
||||
"id": "a-1",
|
||||
"ip_pool": "rotating",
|
||||
"rotation_count": 4,
|
||||
"ja3": "x", "hassh": "y",
|
||||
}],
|
||||
"phases": [{"name": "delivery", "actor": "a-1",
|
||||
"target_selector": {"count": 4}}],
|
||||
"duration_days": 1,
|
||||
}
|
||||
}
|
||||
corpus = generate(spec, seed=0)
|
||||
identity_labels = corpus.truth_labels(level="identity")
|
||||
assert len(identity_labels) == 4 # one per attacker row
|
||||
# All 4 attackers share one identity label.
|
||||
assert len(set(identity_labels.values())) == 1
|
||||
|
||||
|
||||
def test_truth_labels_unknown_level_raises() -> None:
|
||||
spec = _minimal_spec()
|
||||
corpus = generate(spec, seed=0)
|
||||
with pytest.raises(ValueError, match="unknown truth-label level"):
|
||||
corpus.truth_labels(level="campaign-but-spelled-wrong")
|
||||
|
||||
|
||||
def test_sticky_default_unchanged_back_compat() -> None:
|
||||
"""
|
||||
The pre-existing sticky-default path produces exactly one row per
|
||||
actor and assigns truth_identity_id. Smoke-tests that the
|
||||
refactor didn't break the back-compat case.
|
||||
"""
|
||||
corpus = generate(_minimal_spec(), seed=0)
|
||||
assert len(corpus.attackers) == 1
|
||||
assert corpus.attackers[0].truth_identity_id != ""
|
||||
# Default truth_labels still returns campaign labels.
|
||||
labels = corpus.truth_labels()
|
||||
assert set(labels.values()) == {"c-test"}
|
||||
|
||||
|
||||
def test_rotated_sessions_carry_identity_label() -> None:
|
||||
"""SyntheticSession.truth_identity_id matches its parent attacker."""
|
||||
spec = {
|
||||
"campaign": {
|
||||
"id": "c-rot",
|
||||
"actors": [{
|
||||
"id": "a-1",
|
||||
"ip_pool": "rotating",
|
||||
"rotation_count": 3,
|
||||
"ja3": "x", "hassh": "y",
|
||||
}],
|
||||
"phases": [{"name": "delivery", "actor": "a-1",
|
||||
"target_selector": {"count": 6}}],
|
||||
"duration_days": 1,
|
||||
}
|
||||
}
|
||||
corpus = generate(spec, seed=0)
|
||||
by_id = {a.attacker_id: a for a in corpus.attackers}
|
||||
for sess in corpus.sessions:
|
||||
assert sess.truth_identity_id == by_id[sess.attacker_id].truth_identity_id
|
||||
344
tests/clustering/test_campaign_similarity.py
Normal file
344
tests/clustering/test_campaign_similarity.py
Normal file
@@ -0,0 +1,344 @@
|
||||
"""Tests for campaign-level similarity primitives.
|
||||
|
||||
Covers, in order:
|
||||
|
||||
* Each edge family in isolation — phase-handoff, shared-infra,
|
||||
temporal-overlap, cohort.
|
||||
* The F7 (slow_burn) time-agnostic invariant — shifting every
|
||||
timestamp on both sides by the same Δ preserves every edge weight.
|
||||
* The F1 (shared_wordlist) failure mode — shared cohort alone must
|
||||
NOT push a pair over threshold.
|
||||
* The F5 (multi_operator) target — phase-handoff alone (the
|
||||
load-bearing campaign-level signal) DOES cross threshold.
|
||||
* Tier-combination arithmetic — shared-infra + temporal overlap
|
||||
(the canonical co-op pattern) crosses threshold; shared-infra +
|
||||
cohort does not.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.clustering.campaign.impl.similarity import (
|
||||
CAMPAIGN_EDGE_THRESHOLD,
|
||||
DEFAULT_HANDOFF_WINDOW_S,
|
||||
IdentityFeatures,
|
||||
cohort_weight,
|
||||
combined_campaign_weight,
|
||||
phase_handoff_weight,
|
||||
shared_infra_weight,
|
||||
temporal_overlap_weight,
|
||||
)
|
||||
|
||||
|
||||
def _features(uuid: str, **kwargs) -> IdentityFeatures:
|
||||
return IdentityFeatures(identity_uuid=uuid, **kwargs)
|
||||
|
||||
|
||||
# ─── phase_handoff_weight ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_phase_handoff_clean_out_to_in_within_window():
|
||||
a = _features(
|
||||
"a",
|
||||
last_phase_per_decky={"d1": "command_and_control"},
|
||||
last_seen_per_decky={"d1": 1000.0},
|
||||
)
|
||||
b = _features(
|
||||
"b",
|
||||
first_phase_per_decky={"d1": "discovery"},
|
||||
first_seen_per_decky={"d1": 1000.0 + 600.0}, # 10 min later
|
||||
)
|
||||
assert phase_handoff_weight(a, b) == 1.0
|
||||
|
||||
|
||||
def test_phase_handoff_symmetric():
|
||||
# B finishes, A picks up. The argument order shouldn't matter.
|
||||
b = _features(
|
||||
"b",
|
||||
last_phase_per_decky={"d1": "persistence"},
|
||||
last_seen_per_decky={"d1": 5000.0},
|
||||
)
|
||||
a = _features(
|
||||
"a",
|
||||
first_phase_per_decky={"d1": "lateral_movement"},
|
||||
first_seen_per_decky={"d1": 5000.0 + 60.0},
|
||||
)
|
||||
assert phase_handoff_weight(a, b) == 1.0
|
||||
assert phase_handoff_weight(b, a) == 1.0
|
||||
|
||||
|
||||
def test_phase_handoff_no_decky_overlap():
|
||||
a = _features(
|
||||
"a",
|
||||
last_phase_per_decky={"d1": "command_and_control"},
|
||||
last_seen_per_decky={"d1": 1000.0},
|
||||
)
|
||||
b = _features(
|
||||
"b",
|
||||
first_phase_per_decky={"d2": "discovery"},
|
||||
first_seen_per_decky={"d2": 1100.0},
|
||||
)
|
||||
assert phase_handoff_weight(a, b) == 0.0
|
||||
|
||||
|
||||
def test_phase_handoff_phase_mismatch():
|
||||
# A ends mid-pivoting (not a handoff-out phase) → no signal.
|
||||
a = _features(
|
||||
"a",
|
||||
last_phase_per_decky={"d1": "exploitation"},
|
||||
last_seen_per_decky={"d1": 1000.0},
|
||||
)
|
||||
b = _features(
|
||||
"b",
|
||||
first_phase_per_decky={"d1": "discovery"},
|
||||
first_seen_per_decky={"d1": 1100.0},
|
||||
)
|
||||
assert phase_handoff_weight(a, b) == 0.0
|
||||
|
||||
|
||||
def test_phase_handoff_outside_window():
|
||||
a = _features(
|
||||
"a",
|
||||
last_phase_per_decky={"d1": "command_and_control"},
|
||||
last_seen_per_decky={"d1": 0.0},
|
||||
)
|
||||
b = _features(
|
||||
"b",
|
||||
first_phase_per_decky={"d1": "discovery"},
|
||||
# Way past the 24h default window.
|
||||
first_seen_per_decky={"d1": DEFAULT_HANDOFF_WINDOW_S + 3600.0},
|
||||
)
|
||||
assert phase_handoff_weight(a, b) == 0.0
|
||||
|
||||
|
||||
def test_phase_handoff_negative_gap_rejected():
|
||||
# B starts BEFORE A ends — that's overlap, not a handoff.
|
||||
a = _features(
|
||||
"a",
|
||||
last_phase_per_decky={"d1": "persistence"},
|
||||
last_seen_per_decky={"d1": 2000.0},
|
||||
)
|
||||
b = _features(
|
||||
"b",
|
||||
first_phase_per_decky={"d1": "lateral_movement"},
|
||||
first_seen_per_decky={"d1": 1000.0},
|
||||
)
|
||||
assert phase_handoff_weight(a, b) == 0.0
|
||||
|
||||
|
||||
# ─── shared_infra_weight ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_shared_infra_full_overlap():
|
||||
a = _features(
|
||||
"a",
|
||||
payload_hashes=frozenset({"hash-1"}),
|
||||
c2_endpoints=frozenset({"1.2.3.4:443"}),
|
||||
decky_set=frozenset({"d1"}),
|
||||
)
|
||||
b = _features(
|
||||
"b",
|
||||
payload_hashes=frozenset({"hash-1"}),
|
||||
c2_endpoints=frozenset({"1.2.3.4:443"}),
|
||||
decky_set=frozenset({"d1"}),
|
||||
)
|
||||
assert shared_infra_weight(a, b) == 1.0
|
||||
|
||||
|
||||
def test_shared_infra_no_overlap():
|
||||
a = _features("a", payload_hashes=frozenset({"hash-a"}))
|
||||
b = _features("b", payload_hashes=frozenset({"hash-b"}))
|
||||
assert shared_infra_weight(a, b) == 0.0
|
||||
|
||||
|
||||
def test_shared_infra_empty_returns_zero():
|
||||
a = _features("a")
|
||||
b = _features("b")
|
||||
assert shared_infra_weight(a, b) == 0.0
|
||||
|
||||
|
||||
# ─── temporal_overlap_weight ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_temporal_overlap_full():
|
||||
a = _features("a", session_windows=((0.0, 100.0),))
|
||||
b = _features("b", session_windows=((0.0, 100.0),))
|
||||
assert temporal_overlap_weight(a, b) == 1.0
|
||||
|
||||
|
||||
def test_temporal_overlap_partial():
|
||||
a = _features("a", session_windows=((0.0, 100.0),))
|
||||
b = _features("b", session_windows=((50.0, 150.0),))
|
||||
# 50 of 100 of A's time overlaps B.
|
||||
assert temporal_overlap_weight(a, b) == pytest.approx(0.5)
|
||||
|
||||
|
||||
def test_temporal_overlap_disjoint():
|
||||
a = _features("a", session_windows=((0.0, 100.0),))
|
||||
b = _features("b", session_windows=((200.0, 300.0),))
|
||||
assert temporal_overlap_weight(a, b) == 0.0
|
||||
|
||||
|
||||
def test_temporal_overlap_empty():
|
||||
a = _features("a")
|
||||
b = _features("b", session_windows=((0.0, 100.0),))
|
||||
assert temporal_overlap_weight(a, b) == 0.0
|
||||
|
||||
|
||||
# ─── cohort_weight ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_cohort_asn_overlap():
|
||||
a = _features("a", asn_cohort=frozenset({64512}))
|
||||
b = _features("b", asn_cohort=frozenset({64512}))
|
||||
assert cohort_weight(a, b) == 1.0
|
||||
|
||||
|
||||
def test_cohort_disjoint():
|
||||
a = _features("a", asn_cohort=frozenset({64512}))
|
||||
b = _features("b", asn_cohort=frozenset({64513}))
|
||||
assert cohort_weight(a, b) == 0.0
|
||||
|
||||
|
||||
# ─── F7 time-agnostic invariant ──────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_f7_invariant_temporal_overlap_unchanged_under_shift():
|
||||
# The fixture-7 (slow_burn) invariant: shifting every timestamp on
|
||||
# BOTH sides by the same Δ must yield the same edge weight. The
|
||||
# campaign clusterer's edges are pairwise-relative; an absolute
|
||||
# 90-day shift must not change anything.
|
||||
a = _features("a", session_windows=((0.0, 100.0), (300.0, 400.0)))
|
||||
b = _features("b", session_windows=((50.0, 150.0), (350.0, 450.0)))
|
||||
base = temporal_overlap_weight(a, b)
|
||||
shift = 90 * 24 * 3600.0
|
||||
a_shifted = _features(
|
||||
"a",
|
||||
session_windows=tuple((s + shift, e + shift) for s, e in a.session_windows),
|
||||
)
|
||||
b_shifted = _features(
|
||||
"b",
|
||||
session_windows=tuple((s + shift, e + shift) for s, e in b.session_windows),
|
||||
)
|
||||
assert temporal_overlap_weight(a_shifted, b_shifted) == pytest.approx(base)
|
||||
|
||||
|
||||
def test_f7_invariant_phase_handoff_unchanged_under_shift():
|
||||
a = _features(
|
||||
"a",
|
||||
last_phase_per_decky={"d1": "command_and_control"},
|
||||
last_seen_per_decky={"d1": 1000.0},
|
||||
)
|
||||
b = _features(
|
||||
"b",
|
||||
first_phase_per_decky={"d1": "discovery"},
|
||||
first_seen_per_decky={"d1": 1600.0},
|
||||
)
|
||||
base = phase_handoff_weight(a, b)
|
||||
|
||||
shift = 90 * 24 * 3600.0
|
||||
a_shifted = _features(
|
||||
"a",
|
||||
last_phase_per_decky=dict(a.last_phase_per_decky),
|
||||
last_seen_per_decky={k: v + shift for k, v in a.last_seen_per_decky.items()},
|
||||
)
|
||||
b_shifted = _features(
|
||||
"b",
|
||||
first_phase_per_decky=dict(b.first_phase_per_decky),
|
||||
first_seen_per_decky={k: v + shift for k, v in b.first_seen_per_decky.items()},
|
||||
)
|
||||
assert phase_handoff_weight(a_shifted, b_shifted) == base == 1.0
|
||||
|
||||
|
||||
# ─── Combined-weight + threshold semantics ──────────────────────────────────
|
||||
|
||||
|
||||
def test_phase_handoff_alone_crosses_threshold():
|
||||
"""F5 multi_operator's load-bearing signal: handoff alone is enough."""
|
||||
a = _features(
|
||||
"a",
|
||||
last_phase_per_decky={"d1": "persistence"},
|
||||
last_seen_per_decky={"d1": 1000.0},
|
||||
)
|
||||
b = _features(
|
||||
"b",
|
||||
first_phase_per_decky={"d1": "lateral_movement"},
|
||||
first_seen_per_decky={"d1": 1100.0},
|
||||
)
|
||||
assert combined_campaign_weight(a, b) >= CAMPAIGN_EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_cohort_alone_below_threshold():
|
||||
"""F2 vpn_hopping at campaign level: cohort alone is not co-op."""
|
||||
a = _features("a", asn_cohort=frozenset({64512}))
|
||||
b = _features("b", asn_cohort=frozenset({64512}))
|
||||
assert combined_campaign_weight(a, b) < CAMPAIGN_EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_shared_infra_alone_crosses_threshold():
|
||||
"""Shared payload + C2 alone is enough — F5's intended pass condition."""
|
||||
a = _features(
|
||||
"a",
|
||||
payload_hashes=frozenset({"h"}),
|
||||
c2_endpoints=frozenset({"c"}),
|
||||
)
|
||||
b = _features(
|
||||
"b",
|
||||
payload_hashes=frozenset({"h"}),
|
||||
c2_endpoints=frozenset({"c"}),
|
||||
)
|
||||
assert combined_campaign_weight(a, b) >= CAMPAIGN_EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_decky_overlap_alone_below_threshold():
|
||||
"""F1's failure mode: shared targeting on a small fleet is NOT co-op.
|
||||
|
||||
Two campaigns hitting the same SSH deckies share no payload/C2,
|
||||
just the decky set. Cohort tier alone must not cross threshold.
|
||||
"""
|
||||
a = _features(
|
||||
"a",
|
||||
decky_set=frozenset({"d1", "d2"}),
|
||||
asn_cohort=frozenset({64512}),
|
||||
)
|
||||
b = _features(
|
||||
"b",
|
||||
decky_set=frozenset({"d1", "d2"}),
|
||||
asn_cohort=frozenset({64513}),
|
||||
)
|
||||
assert combined_campaign_weight(a, b) < CAMPAIGN_EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_invariant_under_shift():
|
||||
"""End-to-end F7 invariant on the combined weight."""
|
||||
a = _features(
|
||||
"a",
|
||||
last_phase_per_decky={"d1": "persistence"},
|
||||
last_seen_per_decky={"d1": 1000.0},
|
||||
session_windows=((0.0, 1500.0),),
|
||||
payload_hashes=frozenset({"h"}),
|
||||
)
|
||||
b = _features(
|
||||
"b",
|
||||
first_phase_per_decky={"d1": "discovery"},
|
||||
first_seen_per_decky={"d1": 1100.0},
|
||||
session_windows=((1100.0, 2000.0),),
|
||||
payload_hashes=frozenset({"h"}),
|
||||
)
|
||||
base = combined_campaign_weight(a, b)
|
||||
shift = 90 * 24 * 3600.0
|
||||
a_shifted = IdentityFeatures(
|
||||
identity_uuid=a.identity_uuid,
|
||||
last_phase_per_decky=dict(a.last_phase_per_decky),
|
||||
last_seen_per_decky={k: v + shift for k, v in a.last_seen_per_decky.items()},
|
||||
session_windows=tuple((s + shift, e + shift) for s, e in a.session_windows),
|
||||
payload_hashes=a.payload_hashes,
|
||||
)
|
||||
b_shifted = IdentityFeatures(
|
||||
identity_uuid=b.identity_uuid,
|
||||
first_phase_per_decky=dict(b.first_phase_per_decky),
|
||||
first_seen_per_decky={k: v + shift for k, v in b.first_seen_per_decky.items()},
|
||||
session_windows=tuple((s + shift, e + shift) for s, e in b.session_windows),
|
||||
payload_hashes=b.payload_hashes,
|
||||
)
|
||||
assert combined_campaign_weight(a_shifted, b_shifted) == pytest.approx(base)
|
||||
357
tests/clustering/test_campaign_worker.py
Normal file
357
tests/clustering/test_campaign_worker.py
Normal file
@@ -0,0 +1,357 @@
|
||||
"""End-to-end tests for the campaign-clusterer worker shell + tick.
|
||||
|
||||
Mirrors :mod:`tests.clustering.test_clusterer_worker` for the layer
|
||||
above. Covers shell lifecycle (shutdown / cancel / raising tick),
|
||||
end-to-end ``tick`` against SQLite (form, link, merge, revoke), bus
|
||||
fan-out to the four ``campaign.*`` topics + cross-family
|
||||
``identity.campaign.assigned``, factory dispatch, and CLI gating.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.bus import topics as _topics
|
||||
from decnet.clustering.campaign.base import (
|
||||
CampaignClusterer,
|
||||
CampaignClusterResult,
|
||||
)
|
||||
from decnet.clustering.campaign.factory import get_campaign_clusterer
|
||||
from decnet.clustering.campaign.impl.connected_components import (
|
||||
ConnectedComponentsCampaignClusterer,
|
||||
cluster_identities,
|
||||
from_identity_row,
|
||||
)
|
||||
from decnet.clustering.campaign.impl.similarity import IdentityFeatures
|
||||
from decnet.clustering.campaign.worker import run_campaign_clusterer_loop
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "campaign.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _no_bus(monkeypatch):
|
||||
"""Run workers in poll-only mode — no real Unix socket."""
|
||||
monkeypatch.setenv("DECNET_BUS_ENABLED", "false")
|
||||
|
||||
|
||||
# ─── Test doubles ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class _FakeClusterer(CampaignClusterer):
|
||||
name = "fake"
|
||||
|
||||
def __init__(self, results=None) -> None:
|
||||
self._results = list(results or [])
|
||||
self.calls = 0
|
||||
|
||||
async def tick(self, repo) -> CampaignClusterResult:
|
||||
self.calls += 1
|
||||
if self._results:
|
||||
return self._results.pop(0)
|
||||
return CampaignClusterResult()
|
||||
|
||||
|
||||
class _RaisingClusterer(CampaignClusterer):
|
||||
name = "raising"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.calls = 0
|
||||
|
||||
async def tick(self, repo) -> CampaignClusterResult:
|
||||
self.calls += 1
|
||||
raise RuntimeError("boom")
|
||||
|
||||
|
||||
# ─── Shell lifecycle ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_loop_exits_on_shutdown(repo):
|
||||
shutdown = asyncio.Event()
|
||||
clusterer = _FakeClusterer()
|
||||
task = asyncio.create_task(
|
||||
run_campaign_clusterer_loop(
|
||||
repo, poll_interval_secs=0.05,
|
||||
clusterer=clusterer, shutdown=shutdown,
|
||||
)
|
||||
)
|
||||
await asyncio.sleep(0.12)
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
assert clusterer.calls >= 1
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_loop_exits_on_cancel(repo):
|
||||
clusterer = _FakeClusterer()
|
||||
task = asyncio.create_task(
|
||||
run_campaign_clusterer_loop(
|
||||
repo, poll_interval_secs=0.05, clusterer=clusterer,
|
||||
)
|
||||
)
|
||||
await asyncio.sleep(0.1)
|
||||
task.cancel()
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
assert clusterer.calls >= 1
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_failure_does_not_crash_loop(repo):
|
||||
shutdown = asyncio.Event()
|
||||
clusterer = _RaisingClusterer()
|
||||
task = asyncio.create_task(
|
||||
run_campaign_clusterer_loop(
|
||||
repo, poll_interval_secs=0.05,
|
||||
clusterer=clusterer, shutdown=shutdown,
|
||||
)
|
||||
)
|
||||
await asyncio.sleep(0.2)
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
assert clusterer.calls >= 2
|
||||
|
||||
|
||||
# ─── Bus fan-out ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_publishes_campaign_result_on_bus(monkeypatch, repo):
|
||||
published: list[tuple[str, dict, str]] = []
|
||||
|
||||
async def _fake_publish(bus, topic, payload, event_type=""):
|
||||
published.append((topic, payload, event_type))
|
||||
|
||||
monkeypatch.setattr(
|
||||
"decnet.clustering.campaign.worker.publish_safely", _fake_publish,
|
||||
)
|
||||
|
||||
result = CampaignClusterResult(
|
||||
campaigns_formed=[
|
||||
{"campaign_uuid": "c-1", "identity_uuids": ["i-1", "i-2"]},
|
||||
],
|
||||
identities_assigned=[
|
||||
{"campaign_uuid": "c-1", "identity_uuid": "i-3",
|
||||
"prior_campaign_uuid": None},
|
||||
],
|
||||
campaigns_merged=[
|
||||
{"winner_uuid": "c-1", "loser_uuid": "c-2"},
|
||||
],
|
||||
campaigns_unmerged=[
|
||||
{"resurrected_uuid": "c-2", "former_winner_uuid": "c-1"},
|
||||
],
|
||||
)
|
||||
clusterer = _FakeClusterer(results=[result])
|
||||
|
||||
shutdown = asyncio.Event()
|
||||
task = asyncio.create_task(
|
||||
run_campaign_clusterer_loop(
|
||||
repo, poll_interval_secs=0.05,
|
||||
clusterer=clusterer, shutdown=shutdown,
|
||||
)
|
||||
)
|
||||
await asyncio.sleep(0.1)
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
|
||||
topics_seen = {t for t, _, _ in published}
|
||||
assert _topics.campaign(_topics.CAMPAIGN_FORMED) in topics_seen
|
||||
assert _topics.campaign(_topics.CAMPAIGN_IDENTITY_ASSIGNED) in topics_seen
|
||||
assert _topics.campaign(_topics.CAMPAIGN_MERGED) in topics_seen
|
||||
assert _topics.campaign(_topics.CAMPAIGN_UNMERGED) in topics_seen
|
||||
# Cross-family signal — every campaigns_formed identity AND every
|
||||
# identities_assigned identity should fire identity.campaign.assigned.
|
||||
cross = _topics.identity(_topics.IDENTITY_CAMPAIGN_ASSIGNED)
|
||||
cross_payloads = [p for t, p, _ in published if t == cross]
|
||||
cross_idents = {p["identity_uuid"] for p in cross_payloads}
|
||||
assert {"i-1", "i-2", "i-3"}.issubset(cross_idents)
|
||||
|
||||
|
||||
# ─── Pure clusterer + projection ────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_cluster_identities_singletons():
|
||||
a = IdentityFeatures(identity_uuid="a")
|
||||
b = IdentityFeatures(identity_uuid="b")
|
||||
labels = cluster_identities([a, b])
|
||||
assert labels["a"] != labels["b"]
|
||||
|
||||
|
||||
def test_cluster_identities_phase_handoff_unions():
|
||||
a = IdentityFeatures(
|
||||
identity_uuid="a",
|
||||
last_phase_per_decky={"d1": "command_and_control"},
|
||||
last_seen_per_decky={"d1": 1000.0},
|
||||
)
|
||||
b = IdentityFeatures(
|
||||
identity_uuid="b",
|
||||
first_phase_per_decky={"d1": "discovery"},
|
||||
first_seen_per_decky={"d1": 1100.0},
|
||||
)
|
||||
labels = cluster_identities([a, b])
|
||||
assert labels["a"] == labels["b"]
|
||||
|
||||
|
||||
def test_from_identity_row_parses_json_lists():
|
||||
feat = from_identity_row({
|
||||
"uuid": "i-1",
|
||||
"payload_simhashes": json.dumps(["h1", "h2"]),
|
||||
"c2_endpoints": json.dumps(["c1"]),
|
||||
})
|
||||
assert feat.identity_uuid == "i-1"
|
||||
assert feat.payload_hashes == frozenset({"h1", "h2"})
|
||||
assert feat.c2_endpoints == frozenset({"c1"})
|
||||
|
||||
|
||||
def test_from_identity_row_handles_null_and_garbage():
|
||||
f = from_identity_row({
|
||||
"uuid": "i-1",
|
||||
"payload_simhashes": None,
|
||||
"c2_endpoints": "not-json",
|
||||
})
|
||||
assert f.payload_hashes == frozenset()
|
||||
assert f.c2_endpoints == frozenset()
|
||||
|
||||
|
||||
# ─── End-to-end tick against SQLite ────────────────────────────────────────
|
||||
|
||||
|
||||
async def _create_identity(repo, uuid: str, **kwargs) -> str:
|
||||
now = datetime.now(timezone.utc)
|
||||
return await repo.create_attacker_identity({
|
||||
"uuid": uuid,
|
||||
"first_seen_at": now,
|
||||
"last_seen_at": now,
|
||||
"payload_simhashes": kwargs.get("payload_simhashes"),
|
||||
"c2_endpoints": kwargs.get("c2_endpoints"),
|
||||
})
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_empty_db_returns_empty_result(repo):
|
||||
c = ConnectedComponentsCampaignClusterer()
|
||||
result = await c.tick(repo)
|
||||
assert result.campaigns_formed == []
|
||||
assert result.identities_assigned == []
|
||||
assert result.campaigns_merged == []
|
||||
assert result.campaigns_unmerged == []
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_forms_campaign_for_shared_infra_co_op(repo):
|
||||
"""Two identities with shared payload + C2 fold to one campaign.
|
||||
|
||||
The canonical F5-style co-op pattern, exercised end-to-end through
|
||||
the production-row adapter. ``from_identity_row`` reads
|
||||
``payload_simhashes`` + ``c2_endpoints`` from the AttackerIdentity
|
||||
JSON columns, builds IdentityFeatures, and the campaign weight
|
||||
crosses threshold on shared_infra alone.
|
||||
"""
|
||||
await _create_identity(
|
||||
repo, "i1",
|
||||
payload_simhashes=json.dumps(["h1"]),
|
||||
c2_endpoints=json.dumps(["c1"]),
|
||||
)
|
||||
await _create_identity(
|
||||
repo, "i2",
|
||||
payload_simhashes=json.dumps(["h1"]),
|
||||
c2_endpoints=json.dumps(["c1"]),
|
||||
)
|
||||
|
||||
c = ConnectedComponentsCampaignClusterer()
|
||||
result = await c.tick(repo)
|
||||
|
||||
assert len(result.campaigns_formed) == 1
|
||||
formed_idents = set(result.campaigns_formed[0]["identity_uuids"])
|
||||
assert formed_idents == {"i1", "i2"}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_keeps_distinct_payloads_separate(repo):
|
||||
"""No payload/C2 overlap → singleton per identity."""
|
||||
await _create_identity(
|
||||
repo, "i1",
|
||||
payload_simhashes=json.dumps(["h1"]),
|
||||
c2_endpoints=json.dumps(["c1"]),
|
||||
)
|
||||
await _create_identity(
|
||||
repo, "i2",
|
||||
payload_simhashes=json.dumps(["h2"]),
|
||||
c2_endpoints=json.dumps(["c2"]),
|
||||
)
|
||||
|
||||
c = ConnectedComponentsCampaignClusterer()
|
||||
result = await c.tick(repo)
|
||||
|
||||
assert len(result.campaigns_formed) == 2
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_idempotent_links_existing_identity(repo):
|
||||
"""Second tick on same input doesn't double-create campaigns."""
|
||||
await _create_identity(repo, "i1")
|
||||
c = ConnectedComponentsCampaignClusterer()
|
||||
|
||||
r1 = await c.tick(repo)
|
||||
assert len(r1.campaigns_formed) == 1
|
||||
campaign_uuid = r1.campaigns_formed[0]["campaign_uuid"]
|
||||
|
||||
r2 = await c.tick(repo)
|
||||
# Identity already linked — no new campaign, no new assignment.
|
||||
assert r2.campaigns_formed == []
|
||||
assert r2.identities_assigned == []
|
||||
# And the existing assignment persisted.
|
||||
assert await repo.count_identities_for_campaign(campaign_uuid) == 1
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_skips_merged_out_identities(repo):
|
||||
"""Merged-out identity rows must not show up as cluster inputs."""
|
||||
await _create_identity(repo, "i1")
|
||||
await _create_identity(repo, "i2")
|
||||
# Soft-merge i2 into i1 at the identity layer.
|
||||
await repo.update_identity_merged_into("i2", "i1")
|
||||
|
||||
c = ConnectedComponentsCampaignClusterer()
|
||||
result = await c.tick(repo)
|
||||
|
||||
# Only i1 is an active row; one campaign formed, with one identity.
|
||||
assert len(result.campaigns_formed) == 1
|
||||
assert result.campaigns_formed[0]["identity_uuids"] == ["i1"]
|
||||
|
||||
|
||||
# ─── Factory + CLI gating ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_factory_default():
|
||||
c = get_campaign_clusterer()
|
||||
assert isinstance(c, ConnectedComponentsCampaignClusterer)
|
||||
|
||||
|
||||
def test_factory_unknown_raises(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_CAMPAIGN_CLUSTERER_TYPE", "nope")
|
||||
with pytest.raises(ValueError):
|
||||
get_campaign_clusterer()
|
||||
|
||||
|
||||
def test_campaign_clusterer_registered_in_cli():
|
||||
from decnet.cli.gating import MASTER_ONLY_COMMANDS
|
||||
assert "campaign-clusterer" in MASTER_ONLY_COMMANDS
|
||||
|
||||
|
||||
def test_campaign_topic_builder_round_trips():
|
||||
assert _topics.campaign(_topics.CAMPAIGN_FORMED) == "campaign.formed"
|
||||
assert _topics.campaign(_topics.CAMPAIGN_IDENTITY_ASSIGNED) == (
|
||||
"campaign.identity.assigned"
|
||||
)
|
||||
assert _topics.identity(_topics.IDENTITY_CAMPAIGN_ASSIGNED) == (
|
||||
"identity.campaign.assigned"
|
||||
)
|
||||
34
tests/clustering/test_clusterer_factory.py
Normal file
34
tests/clustering/test_clusterer_factory.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""Tests for :mod:`decnet.clustering.factory`."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.clustering.base import Clusterer
|
||||
from decnet.clustering.factory import get_clusterer
|
||||
from decnet.clustering.impl.connected_components import ConnectedComponentsClusterer
|
||||
|
||||
|
||||
def test_default_returns_connected_components(monkeypatch):
|
||||
monkeypatch.delenv("DECNET_CLUSTERER_TYPE", raising=False)
|
||||
c = get_clusterer()
|
||||
assert isinstance(c, ConnectedComponentsClusterer)
|
||||
assert isinstance(c, Clusterer)
|
||||
assert c.name == "connected_components"
|
||||
|
||||
|
||||
def test_explicit_connected_components(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_CLUSTERER_TYPE", "connected_components")
|
||||
c = get_clusterer()
|
||||
assert isinstance(c, ConnectedComponentsClusterer)
|
||||
|
||||
|
||||
def test_unknown_clusterer_type_raises(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_CLUSTERER_TYPE", "nope")
|
||||
with pytest.raises(ValueError, match="Unknown clusterer"):
|
||||
get_clusterer()
|
||||
|
||||
|
||||
def test_case_insensitive(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_CLUSTERER_TYPE", " CONNECTED_COMPONENTS ")
|
||||
c = get_clusterer()
|
||||
assert isinstance(c, ConnectedComponentsClusterer)
|
||||
182
tests/clustering/test_clusterer_worker.py
Normal file
182
tests/clustering/test_clusterer_worker.py
Normal file
@@ -0,0 +1,182 @@
|
||||
"""End-to-end tests for the clusterer worker shell.
|
||||
|
||||
The skeleton clusterer is a no-op; these tests cover the shell:
|
||||
|
||||
* exits cleanly on shutdown signal (and via cancel)
|
||||
* invokes ``tick`` on each loop iteration
|
||||
* publishes :class:`ClusterResult` side-effects on the right topics
|
||||
* a clusterer raising from ``tick`` is logged and does not crash the loop
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.bus import topics as _topics
|
||||
from decnet.clustering.base import Clusterer, ClusterResult
|
||||
from decnet.clustering.impl.connected_components import ConnectedComponentsClusterer
|
||||
from decnet.clustering.worker import run_clusterer_loop
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "clusterer.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _no_bus(monkeypatch):
|
||||
"""Run workers in poll-only mode — no real Unix socket."""
|
||||
monkeypatch.setenv("DECNET_BUS_ENABLED", "false")
|
||||
|
||||
|
||||
class _FakeClusterer(Clusterer):
|
||||
"""Test double: returns canned :class:`ClusterResult` per call."""
|
||||
|
||||
name = "fake"
|
||||
|
||||
def __init__(self, results: list[ClusterResult] | None = None) -> None:
|
||||
self._results = list(results or [])
|
||||
self.calls = 0
|
||||
|
||||
async def tick(self, repo) -> ClusterResult:
|
||||
self.calls += 1
|
||||
if self._results:
|
||||
return self._results.pop(0)
|
||||
return ClusterResult()
|
||||
|
||||
|
||||
class _RaisingClusterer(Clusterer):
|
||||
name = "raising"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.calls = 0
|
||||
|
||||
async def tick(self, repo) -> ClusterResult:
|
||||
self.calls += 1
|
||||
raise RuntimeError("boom")
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_loop_exits_on_shutdown_signal(repo):
|
||||
shutdown = asyncio.Event()
|
||||
clusterer = _FakeClusterer()
|
||||
task = asyncio.create_task(
|
||||
run_clusterer_loop(
|
||||
repo,
|
||||
poll_interval_secs=0.05,
|
||||
clusterer=clusterer,
|
||||
shutdown=shutdown,
|
||||
)
|
||||
)
|
||||
await asyncio.sleep(0.12)
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
assert clusterer.calls >= 1
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_loop_exits_on_cancel(repo):
|
||||
clusterer = _FakeClusterer()
|
||||
task = asyncio.create_task(
|
||||
run_clusterer_loop(
|
||||
repo,
|
||||
poll_interval_secs=0.05,
|
||||
clusterer=clusterer,
|
||||
)
|
||||
)
|
||||
await asyncio.sleep(0.1)
|
||||
task.cancel()
|
||||
# The loop catches CancelledError and exits cleanly, mirroring the
|
||||
# intel + reuse worker shells.
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
assert clusterer.calls >= 1
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_failure_does_not_crash_loop(repo):
|
||||
"""A clusterer raising from tick must be logged, not propagated."""
|
||||
shutdown = asyncio.Event()
|
||||
clusterer = _RaisingClusterer()
|
||||
task = asyncio.create_task(
|
||||
run_clusterer_loop(
|
||||
repo,
|
||||
poll_interval_secs=0.05,
|
||||
clusterer=clusterer,
|
||||
shutdown=shutdown,
|
||||
)
|
||||
)
|
||||
await asyncio.sleep(0.2)
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
# Loop kept ticking despite the raise.
|
||||
assert clusterer.calls >= 2
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_skeleton_clusterer_returns_empty_result(repo):
|
||||
"""The connected-components skeleton produces no side-effects yet."""
|
||||
c = ConnectedComponentsClusterer()
|
||||
result = await c.tick(repo)
|
||||
assert result.identities_formed == []
|
||||
assert result.observations_linked == []
|
||||
assert result.identities_merged == []
|
||||
assert result.identities_unmerged == []
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_publishes_cluster_result_on_bus(monkeypatch, repo):
|
||||
"""Every entry in ClusterResult fans out to the correct topic."""
|
||||
published: list[tuple[str, dict, str]] = []
|
||||
|
||||
async def _fake_publish(bus, topic, payload, event_type=""):
|
||||
published.append((topic, payload, event_type))
|
||||
|
||||
monkeypatch.setattr(
|
||||
"decnet.clustering.worker.publish_safely", _fake_publish,
|
||||
)
|
||||
|
||||
result = ClusterResult(
|
||||
identities_formed=[
|
||||
{"identity_uuid": "id-1", "observation_uuids": ["obs-1", "obs-2"]},
|
||||
],
|
||||
observations_linked=[
|
||||
{"identity_uuid": "id-1", "observation_uuid": "obs-3"},
|
||||
],
|
||||
identities_merged=[
|
||||
{"winner_uuid": "id-1", "loser_uuid": "id-2"},
|
||||
],
|
||||
identities_unmerged=[
|
||||
{"resurrected_uuid": "id-2", "former_winner_uuid": "id-1"},
|
||||
],
|
||||
)
|
||||
clusterer = _FakeClusterer(results=[result])
|
||||
|
||||
shutdown = asyncio.Event()
|
||||
task = asyncio.create_task(
|
||||
run_clusterer_loop(
|
||||
repo,
|
||||
poll_interval_secs=0.05,
|
||||
clusterer=clusterer,
|
||||
shutdown=shutdown,
|
||||
)
|
||||
)
|
||||
await asyncio.sleep(0.1)
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
|
||||
topics_seen = {t for t, _, _ in published}
|
||||
assert _topics.identity(_topics.IDENTITY_FORMED) in topics_seen
|
||||
assert _topics.identity(_topics.IDENTITY_OBSERVATION_LINKED) in topics_seen
|
||||
assert _topics.identity(_topics.IDENTITY_MERGED) in topics_seen
|
||||
assert _topics.identity(_topics.IDENTITY_UNMERGED) in topics_seen
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_clusterer_registered_in_cli():
|
||||
"""`decnet clusterer` is registered as a master-only command."""
|
||||
from decnet.cli.gating import MASTER_ONLY_COMMANDS
|
||||
assert "clusterer" in MASTER_ONLY_COMMANDS
|
||||
808
tests/clustering/test_connected_components.py
Normal file
808
tests/clustering/test_connected_components.py
Normal file
@@ -0,0 +1,808 @@
|
||||
"""Tests for the connected-components clusterer (commit 4 — high-weight edges).
|
||||
|
||||
Covers, in order:
|
||||
|
||||
* The pure ``cluster_observations`` algorithm — singletons stay
|
||||
isolated, exact-match high-weight signals fold them together,
|
||||
un-fingerprinted observations stay un-mergeable.
|
||||
* The production-row adapter ``from_attacker_row`` — JA3 / HASSH
|
||||
recovered from the fingerprints JSON; absent fields project to
|
||||
``None``.
|
||||
* End-to-end ``tick`` against a real SQLite repo: seeded attackers
|
||||
with shared / divergent fingerprints get the right identity rows
|
||||
written and the right ``identity_id`` links set.
|
||||
* Three fixture-bound assertions: lone_wolf (pure singletons),
|
||||
shared_wordlist (no fingerprint signal — singletons), and
|
||||
vpn_hopping at identity-level (one identity from 5 rotated IPs
|
||||
via shared JA3 + HASSH).
|
||||
|
||||
The tick is bus-free here — the worker shell tests cover bus fan-out
|
||||
separately. We're validating the algorithm + DB writes here.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.clustering.impl.connected_components import (
|
||||
ConnectedComponentsClusterer,
|
||||
cluster_observations,
|
||||
from_attacker_row,
|
||||
)
|
||||
from decnet.clustering.impl.similarity import Observation, from_synthetic
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
|
||||
|
||||
|
||||
# ─── pure algorithm ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _obs(obs_id: str, **kwargs) -> Observation:
|
||||
return Observation(observation_id=obs_id, **kwargs)
|
||||
|
||||
|
||||
def test_cluster_observations_singletons_stay_isolated():
|
||||
a = _obs("a", ja3="ja3-a")
|
||||
b = _obs("b", ja3="ja3-b")
|
||||
c = _obs("c") # no fingerprint
|
||||
labels = cluster_observations([a, b, c])
|
||||
assert labels["a"] != labels["b"]
|
||||
assert labels["b"] != labels["c"]
|
||||
assert labels["a"] != labels["c"]
|
||||
|
||||
|
||||
def test_cluster_observations_ja3_match_unions():
|
||||
a = _obs("a", ja3="ja3-shared")
|
||||
b = _obs("b", ja3="ja3-shared")
|
||||
c = _obs("c", ja3="ja3-other")
|
||||
labels = cluster_observations([a, b, c])
|
||||
assert labels["a"] == labels["b"]
|
||||
assert labels["a"] != labels["c"]
|
||||
|
||||
|
||||
def test_cluster_observations_unfingerprinted_stay_separate():
|
||||
"""Two observations with no signals must NOT collapse into one
|
||||
cluster — that would fuse every noise scanner together."""
|
||||
a = _obs("a")
|
||||
b = _obs("b")
|
||||
labels = cluster_observations([a, b])
|
||||
assert labels["a"] != labels["b"]
|
||||
|
||||
|
||||
def test_cluster_observations_transitive_via_payload():
|
||||
"""A↔B via JA3, B↔C via payload → A, B, C all in one component."""
|
||||
a = _obs("a", ja3="ja3-x")
|
||||
b = _obs("b", ja3="ja3-x", payload_hashes=frozenset({"pl-1"}))
|
||||
c = _obs("c", payload_hashes=frozenset({"pl-1"}))
|
||||
labels = cluster_observations([a, b, c])
|
||||
assert labels["a"] == labels["b"] == labels["c"]
|
||||
|
||||
|
||||
def test_cluster_observations_empty_input():
|
||||
assert cluster_observations([]) == {}
|
||||
|
||||
|
||||
def test_cluster_observations_deterministic():
|
||||
"""Same input → same labels. Load-bearing for fixture stability."""
|
||||
obs = [_obs("a", ja3="x"), _obs("b", ja3="x"), _obs("c")]
|
||||
assert cluster_observations(obs) == cluster_observations(obs)
|
||||
|
||||
|
||||
# ─── production-row adapter ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_from_attacker_row_extracts_ja3_and_hassh():
|
||||
row = {
|
||||
"uuid": "att-1",
|
||||
"asn": 64500,
|
||||
"identity_id": None,
|
||||
"fingerprints": json.dumps([
|
||||
{"kind": "ja3", "hash": "ja3-abc"},
|
||||
{"kind": "hassh", "hash": "hassh-def"},
|
||||
{"kind": "jarm", "hash": "jarm-ghi"}, # not used in v1
|
||||
]),
|
||||
}
|
||||
obs = from_attacker_row(row)
|
||||
assert obs.observation_id == "att-1"
|
||||
assert obs.ja3 == "ja3-abc"
|
||||
assert obs.hassh == "hassh-def"
|
||||
assert obs.asn == 64500
|
||||
|
||||
|
||||
def test_from_attacker_row_handles_empty_fingerprints():
|
||||
row = {"uuid": "att-2", "asn": None, "identity_id": None, "fingerprints": "[]"}
|
||||
obs = from_attacker_row(row)
|
||||
assert obs.ja3 is None
|
||||
assert obs.hassh is None
|
||||
assert obs.asn is None
|
||||
|
||||
|
||||
def test_from_attacker_row_handles_malformed_json():
|
||||
row = {"uuid": "att-3", "asn": None, "identity_id": None, "fingerprints": "not json"}
|
||||
obs = from_attacker_row(row)
|
||||
assert obs.ja3 is None
|
||||
assert obs.hassh is None
|
||||
|
||||
|
||||
# ─── end-to-end tick against SQLite ────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "clusterer.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
async def _seed_attacker(
|
||||
repo, ip: str, *,
|
||||
ja3: str | None = None,
|
||||
hassh: str | None = None,
|
||||
asn: int | None = None,
|
||||
cert_sha256: str | None = None,
|
||||
) -> str:
|
||||
now = datetime.now(timezone.utc)
|
||||
# Two-shape fingerprint payload:
|
||||
# - the "kind" entries feed the clusterer's from_attacker_row
|
||||
# (test-fixture shape, line ~115 of connected_components.py)
|
||||
# - the "bounty_type/payload" entries feed identity_rollup's
|
||||
# extract_fp_summaries (production shape, written by the
|
||||
# profiler from real bounty rows). Both shapes coexist in
|
||||
# the same JSON list so the same seed exercises clustering
|
||||
# AND the identity-column rollup.
|
||||
fingerprints: list[dict] = []
|
||||
if ja3:
|
||||
fingerprints.append({"kind": "ja3", "hash": ja3})
|
||||
fingerprints.append({
|
||||
"bounty_type": "fingerprint",
|
||||
"payload": {"fingerprint_type": "ja3", "ja3": ja3},
|
||||
})
|
||||
if hassh:
|
||||
fingerprints.append({"kind": "hassh", "hash": hassh})
|
||||
fingerprints.append({
|
||||
"bounty_type": "fingerprint",
|
||||
"payload": {"fingerprint_type": "hassh_server", "hash": hassh},
|
||||
})
|
||||
if cert_sha256:
|
||||
fingerprints.append({
|
||||
"bounty_type": "fingerprint",
|
||||
"payload": {
|
||||
"fingerprint_type": "tls_certificate",
|
||||
"cert_sha256": cert_sha256,
|
||||
},
|
||||
})
|
||||
return await repo.upsert_attacker({
|
||||
"ip": ip,
|
||||
"first_seen": now,
|
||||
"last_seen": now,
|
||||
"event_count": 1,
|
||||
"asn": asn,
|
||||
"fingerprints": json.dumps(fingerprints),
|
||||
})
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_on_empty_db_is_noop(repo):
|
||||
c = ConnectedComponentsClusterer()
|
||||
result = await c.tick(repo)
|
||||
assert result.identities_formed == []
|
||||
assert result.observations_linked == []
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_clusters_shared_ja3(repo):
|
||||
"""Two observations with the same JA3 → one identity row, both linked."""
|
||||
a = await _seed_attacker(repo, "1.1.1.1", ja3="ja3-x", asn=64500)
|
||||
b = await _seed_attacker(repo, "2.2.2.2", ja3="ja3-x", asn=64501)
|
||||
|
||||
c = ConnectedComponentsClusterer()
|
||||
result = await c.tick(repo)
|
||||
|
||||
assert len(result.identities_formed) == 1
|
||||
formed = result.identities_formed[0]
|
||||
assert set(formed["observation_uuids"]) == {a, b}
|
||||
|
||||
# Identity row exists and both attackers FK to it.
|
||||
identity_uuid = formed["identity_uuid"]
|
||||
identity = await repo.get_identity_by_uuid(identity_uuid)
|
||||
assert identity is not None
|
||||
assert identity["uuid"] == identity_uuid
|
||||
|
||||
obs_for_id = await repo.list_observations_for_identity(identity_uuid)
|
||||
obs_uuids = {o["uuid"] for o in obs_for_id}
|
||||
assert obs_uuids == {a, b}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_keeps_distinct_ja3_separate(repo):
|
||||
"""Two divergent JA3s with no other shared signal → two singletons,
|
||||
no identity rows written (singletons stay un-clustered in v1)."""
|
||||
await _seed_attacker(repo, "1.1.1.1", ja3="ja3-a")
|
||||
await _seed_attacker(repo, "2.2.2.2", ja3="ja3-b")
|
||||
|
||||
c = ConnectedComponentsClusterer()
|
||||
result = await c.tick(repo)
|
||||
|
||||
# Singletons get identity rows of their own (one observation per cluster).
|
||||
assert len(result.identities_formed) == 2
|
||||
for formed in result.identities_formed:
|
||||
assert len(formed["observation_uuids"]) == 1
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_merges_two_identities_when_component_spans_them(repo):
|
||||
"""Two pre-existing identities whose observations now cluster
|
||||
together (e.g. a previously-missing fingerprint shows up) get
|
||||
soft-merged: the smaller-uuid identity wins, the loser's
|
||||
merged_into_uuid is set, observations stay FK'd to their
|
||||
original identity row."""
|
||||
# Tick 1: two distinct fingerprints → two distinct identities.
|
||||
a = await _seed_attacker(repo, "1.1.1.1", ja3="ja3-A")
|
||||
b = await _seed_attacker(repo, "2.2.2.2", ja3="ja3-B")
|
||||
|
||||
c = ConnectedComponentsClusterer()
|
||||
first = await c.tick(repo)
|
||||
assert len(first.identities_formed) == 2
|
||||
|
||||
# Snapshot the two identity uuids; we'll need them after the merge.
|
||||
identities_after_first = await repo.list_all_identities()
|
||||
assert len(identities_after_first) == 2
|
||||
uuids = sorted(i["uuid"] for i in identities_after_first)
|
||||
expected_winner, expected_loser = uuids[0], uuids[1]
|
||||
|
||||
# Tick 2: a bridging observation — fingerprints match BOTH prior
|
||||
# rows. The bridge can't agree with both JA3s simultaneously, so
|
||||
# use a HASSH that matches A and a payload that matches B.
|
||||
# Simulate this with two new attackers, each linking a side.
|
||||
# Simpler: change attacker A's stored fingerprint to also include
|
||||
# ja3-B by re-seeding (in production this would be a fresh
|
||||
# observation that bridges them).
|
||||
bridge = await _seed_attacker(repo, "3.3.3.3", ja3="ja3-A", hassh="hassh-bridge")
|
||||
# Make B's row carry the same hassh so the bridge can union them.
|
||||
import json as _json
|
||||
from datetime import datetime, timezone
|
||||
now = datetime.now(timezone.utc)
|
||||
await repo.upsert_attacker({
|
||||
"ip": "2.2.2.2", "first_seen": now, "last_seen": now,
|
||||
"event_count": 1,
|
||||
"fingerprints": _json.dumps([
|
||||
{"kind": "ja3", "hash": "ja3-B"},
|
||||
{"kind": "hassh", "hash": "hassh-bridge"},
|
||||
]),
|
||||
})
|
||||
|
||||
second = await c.tick(repo)
|
||||
assert len(second.identities_merged) == 1
|
||||
merge = second.identities_merged[0]
|
||||
assert merge["winner_uuid"] == expected_winner
|
||||
assert merge["loser_uuid"] == expected_loser
|
||||
|
||||
# The loser's row still exists with merged_into_uuid set.
|
||||
all_after = {i["uuid"]: i for i in await repo.list_all_identities()}
|
||||
assert all_after[expected_loser]["merged_into_uuid"] == expected_winner
|
||||
assert all_after[expected_winner]["merged_into_uuid"] is None
|
||||
|
||||
# Observations stay FK'd to their original identity row — the
|
||||
# merge is a soft pointer, NOT a re-point.
|
||||
a_row = await repo.get_attacker_by_uuid(a)
|
||||
b_row = await repo.get_attacker_by_uuid(b)
|
||||
assert a_row["identity_id"] in {expected_winner, expected_loser}
|
||||
assert b_row["identity_id"] in {expected_winner, expected_loser}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_unmerges_when_observations_diverge(repo):
|
||||
"""Pre-seed a soft-merged pair, then change the underlying
|
||||
observations so they no longer cluster. The tick must clear
|
||||
merged_into_uuid and emit identities_unmerged."""
|
||||
import json as _json
|
||||
from datetime import datetime, timezone
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Two attackers with same JA3 → tick merges them via shared
|
||||
# high-tier signal (one identity formed).
|
||||
a = await _seed_attacker(repo, "1.1.1.1", ja3="ja3-shared")
|
||||
b = await _seed_attacker(repo, "2.2.2.2", ja3="ja3-shared")
|
||||
c = ConnectedComponentsClusterer()
|
||||
first = await c.tick(repo)
|
||||
assert len(first.identities_formed) == 1
|
||||
one_identity_uuid = first.identities_formed[0]["identity_uuid"]
|
||||
|
||||
# Force a soft-merge state: split observation b out into its own
|
||||
# identity, then merge that back into the first via the repo
|
||||
# directly. This emulates a state the clusterer would have
|
||||
# arrived at across multiple ticks (form, then merge).
|
||||
second_uuid = "00000000-0000-0000-0000-00000000bbbb"
|
||||
await repo.create_attacker_identity({
|
||||
"uuid": second_uuid,
|
||||
"schema_version": 1,
|
||||
"first_seen_at": now, "last_seen_at": now,
|
||||
"created_at": now, "updated_at": now,
|
||||
"observation_count": 1,
|
||||
})
|
||||
await repo.set_attacker_identity_id(b, second_uuid)
|
||||
# Soft-merge second_uuid into one_identity_uuid (winner).
|
||||
winner = min(one_identity_uuid, second_uuid)
|
||||
loser = max(one_identity_uuid, second_uuid)
|
||||
if loser == one_identity_uuid:
|
||||
# Make the canonical mapping consistent with the test setup —
|
||||
# we need the merge to be "loser → winner" by min-uuid rule.
|
||||
# Swap ownership so the smaller-uuid keeps the active observations.
|
||||
await repo.set_attacker_identity_id(a, winner)
|
||||
await repo.set_attacker_identity_id(b, loser)
|
||||
await repo.update_identity_merged_into(loser, winner)
|
||||
|
||||
# Verify the soft-merge is in place.
|
||||
pre = {i["uuid"]: i for i in await repo.list_all_identities()}
|
||||
assert pre[loser]["merged_into_uuid"] == winner
|
||||
|
||||
# Now change the underlying fingerprints so a and b no longer cluster.
|
||||
await repo.upsert_attacker({
|
||||
"ip": "2.2.2.2", "first_seen": now, "last_seen": now,
|
||||
"event_count": 1,
|
||||
"fingerprints": _json.dumps([{"kind": "ja3", "hash": "ja3-different"}]),
|
||||
})
|
||||
|
||||
# Tick should detect the divergence and revoke the merge.
|
||||
third = await c.tick(repo)
|
||||
assert len(third.identities_unmerged) == 1
|
||||
unmerged = third.identities_unmerged[0]
|
||||
assert unmerged["resurrected_uuid"] == loser
|
||||
assert unmerged["former_winner_uuid"] == winner
|
||||
|
||||
post = {i["uuid"]: i for i in await repo.list_all_identities()}
|
||||
assert post[loser]["merged_into_uuid"] is None
|
||||
assert post[winner]["merged_into_uuid"] is None
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_is_idempotent_under_no_changes(repo):
|
||||
"""Running tick twice with no state changes between produces no
|
||||
side-effects on the second run."""
|
||||
await _seed_attacker(repo, "1.1.1.1", ja3="ja3-x")
|
||||
await _seed_attacker(repo, "2.2.2.2", ja3="ja3-x")
|
||||
await _seed_attacker(repo, "3.3.3.3", ja3="ja3-y")
|
||||
|
||||
c = ConnectedComponentsClusterer()
|
||||
first = await c.tick(repo)
|
||||
second = await c.tick(repo)
|
||||
assert second.identities_formed == []
|
||||
assert second.observations_linked == []
|
||||
assert second.identities_merged == []
|
||||
assert second.identities_unmerged == []
|
||||
# Sanity: the first tick did do something.
|
||||
assert first.identities_formed
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_links_new_observation_to_existing_identity(repo):
|
||||
"""First tick: 2 attackers cluster into one identity. Second tick:
|
||||
a new attacker with the same JA3 should get linked, not minted."""
|
||||
a = await _seed_attacker(repo, "1.1.1.1", ja3="ja3-x")
|
||||
b = await _seed_attacker(repo, "2.2.2.2", ja3="ja3-x")
|
||||
|
||||
c = ConnectedComponentsClusterer()
|
||||
first = await c.tick(repo)
|
||||
assert len(first.identities_formed) == 1
|
||||
identity_uuid = first.identities_formed[0]["identity_uuid"]
|
||||
|
||||
# New observation arrives; same JA3.
|
||||
d = await _seed_attacker(repo, "3.3.3.3", ja3="ja3-x")
|
||||
|
||||
second = await c.tick(repo)
|
||||
# No new identity should be formed for the existing component;
|
||||
# observation-linked should fire for the new one.
|
||||
formed_uuids = {f["identity_uuid"] for f in second.identities_formed}
|
||||
assert identity_uuid not in formed_uuids, (
|
||||
"second tick must link to the existing identity, not mint a new one"
|
||||
)
|
||||
linked_uuids = {l_["observation_uuid"] for l_ in second.observations_linked}
|
||||
assert d in linked_uuids
|
||||
|
||||
|
||||
# ─── identity fingerprint rollup ───────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_rolls_up_fingerprint_columns_on_create(repo):
|
||||
"""A fresh-component tick must populate ja3_hashes / hassh_hashes /
|
||||
tls_cert_sha256 on the newly-minted identity row, deduplicated and
|
||||
sorted across all member observations."""
|
||||
await _seed_attacker(
|
||||
repo, "1.1.1.1", ja3="ja3-x", hassh="hassh-y", cert_sha256="ab" * 32,
|
||||
)
|
||||
await _seed_attacker(
|
||||
repo, "2.2.2.2", ja3="ja3-x", hassh="hassh-y", cert_sha256="cd" * 32,
|
||||
)
|
||||
c = ConnectedComponentsClusterer()
|
||||
result = await c.tick(repo)
|
||||
assert len(result.identities_formed) == 1
|
||||
identity_uuid = result.identities_formed[0]["identity_uuid"]
|
||||
|
||||
rows = {i["uuid"]: i for i in await repo.list_all_identities()}
|
||||
identity = rows[identity_uuid]
|
||||
assert json.loads(identity["ja3_hashes"]) == ["ja3-x"]
|
||||
assert json.loads(identity["hassh_hashes"]) == ["hassh-y"]
|
||||
assert json.loads(identity["tls_cert_sha256"]) == sorted(["ab" * 32, "cd" * 32])
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_rolls_up_fingerprints_on_link(repo):
|
||||
"""When a new observation links into an existing identity, the
|
||||
rollup must reflect any new cert SHA-256 it brings."""
|
||||
await _seed_attacker(
|
||||
repo, "1.1.1.1", ja3="ja3-x", cert_sha256="ab" * 32,
|
||||
)
|
||||
c = ConnectedComponentsClusterer()
|
||||
first = await c.tick(repo)
|
||||
identity_uuid = first.identities_formed[0]["identity_uuid"]
|
||||
|
||||
# New observation, same JA3, fresh cert.
|
||||
await _seed_attacker(
|
||||
repo, "2.2.2.2", ja3="ja3-x", cert_sha256="cd" * 32,
|
||||
)
|
||||
await c.tick(repo)
|
||||
|
||||
rows = {i["uuid"]: i for i in await repo.list_all_identities()}
|
||||
identity = rows[identity_uuid]
|
||||
assert json.loads(identity["tls_cert_sha256"]) == sorted(["ab" * 32, "cd" * 32])
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tick_leaves_columns_null_when_no_fingerprints(repo):
|
||||
"""Two attackers with NO fingerprint signal cluster as separate
|
||||
singletons; their identity rows must keep all rollup columns NULL
|
||||
(not "[]" — NULL distinguishes 'no signal yet' from 'known empty')."""
|
||||
await _seed_attacker(repo, "1.1.1.1")
|
||||
await _seed_attacker(repo, "2.2.2.2")
|
||||
c = ConnectedComponentsClusterer()
|
||||
await c.tick(repo)
|
||||
|
||||
for identity in await repo.list_all_identities():
|
||||
assert identity["ja3_hashes"] is None
|
||||
assert identity["hassh_hashes"] is None
|
||||
assert identity["tls_cert_sha256"] is None
|
||||
|
||||
|
||||
# ─── fixture-bound assertions (in-memory) ──────────────────────────────────
|
||||
|
||||
|
||||
def _production_clusterer_predict(corpus) -> dict[str, str]:
|
||||
"""Run the production cluster_observations over a corpus.
|
||||
|
||||
Mirrors the reference clusterer signature (corpus → dict) so it can
|
||||
be passed to ``assert_fixture_bounds``. Pure / in-memory — does NOT
|
||||
touch the DB. The DB-side path is covered by the tick tests above.
|
||||
"""
|
||||
obs = [from_synthetic(att) for att in corpus.attackers]
|
||||
labels = cluster_observations(obs)
|
||||
|
||||
# Singletons (no shared signal) get unique cluster ids so the
|
||||
# metrics see them as distinct classes — matches the
|
||||
# fingerprint_clusterer reference shape on lone_wolf / shared_wordlist.
|
||||
pred: dict[str, str] = {}
|
||||
cluster_sizes: dict[str, int] = {}
|
||||
for cid in labels.values():
|
||||
cluster_sizes[cid] = cluster_sizes.get(cid, 0) + 1
|
||||
for obs_id, cid in labels.items():
|
||||
if cluster_sizes[cid] == 1:
|
||||
pred[obs_id] = f"cc-singleton-{obs_id}"
|
||||
else:
|
||||
pred[obs_id] = cid
|
||||
return pred
|
||||
|
||||
|
||||
def test_lone_wolf_passes_with_production_clusterer():
|
||||
"""Fixture 3: every actor singleton. The production clusterer
|
||||
keeps them all separate (no shared high-weight signal)."""
|
||||
from tests.clustering.fixture_harness import assert_fixture_bounds
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
corpus = generate(load_yaml(FIXTURE_DIR / "lone_wolf.yaml"), seed=0)
|
||||
assert_fixture_bounds(
|
||||
corpus, _production_clusterer_predict,
|
||||
FIXTURE_DIR / "lone_wolf.expected.yaml",
|
||||
)
|
||||
|
||||
|
||||
def test_shared_wordlist_passes_with_production_clusterer():
|
||||
"""Fixture 1: two campaigns sharing only credentials, divergent
|
||||
infra. The production clusterer (high-weight edges only) keeps
|
||||
them separate — credential overlap is not a v1 signal yet."""
|
||||
from tests.clustering.fixture_harness import assert_fixture_bounds
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
corpus = generate(load_yaml(FIXTURE_DIR / "shared_wordlist.yaml"), seed=0)
|
||||
assert_fixture_bounds(
|
||||
corpus, _production_clusterer_predict,
|
||||
FIXTURE_DIR / "shared_wordlist.expected.yaml",
|
||||
)
|
||||
|
||||
|
||||
def test_paused_campaign_passes_with_production_clusterer():
|
||||
"""Fixture 4: one campaign split across two operational windows by
|
||||
a multi-day silence. Both halves share JA3 + HASSH + payload + C2;
|
||||
the production clusterer must fold them into one identity. Time-
|
||||
agnostic invariant: the silence window is irrelevant to clustering."""
|
||||
from tests.clustering.fixture_harness import assert_fixture_bounds
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
corpus = generate(load_yaml(FIXTURE_DIR / "paused_campaign.yaml"), seed=0)
|
||||
assert_fixture_bounds(
|
||||
corpus, _production_clusterer_predict,
|
||||
FIXTURE_DIR / "paused_campaign.expected.yaml",
|
||||
)
|
||||
|
||||
|
||||
def test_multi_operator_keeps_distinct_identities_with_production_clusterer():
|
||||
"""Fixture 5 at identity-level: two operators with distinct
|
||||
JA3 + HASSH, sharing C2 + payload. The production clusterer's
|
||||
fingerprint-disagreement veto must keep them as 2 identities."""
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
from tests.clustering.metrics import score
|
||||
|
||||
corpus = generate(load_yaml(FIXTURE_DIR / "multi_operator.yaml"), seed=0)
|
||||
pred = _production_clusterer_predict(corpus)
|
||||
# Two distinct truth identities; the production clusterer must
|
||||
# produce two distinct predicted clusters (no merge across
|
||||
# fingerprint-disagreeing operators).
|
||||
assert len(set(pred.values())) == 2
|
||||
metrics = score(corpus.truth_labels(level="identity"), pred)
|
||||
# Perfect identity-level recovery: ARI = 1.0, homogeneity = 1.0.
|
||||
assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
|
||||
assert metrics["homogeneity"] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_cluster_observations_credentials_alone_does_not_fuse():
|
||||
"""Two observations sharing a credential set but nothing else
|
||||
must stay distinct. Fixture 1's failure mode in miniature."""
|
||||
a = Observation(
|
||||
observation_id="a",
|
||||
credentials=frozenset({("root", "toor"), ("admin", "admin")}),
|
||||
)
|
||||
b = Observation(
|
||||
observation_id="b",
|
||||
credentials=frozenset({("root", "toor"), ("admin", "admin")}),
|
||||
)
|
||||
labels = cluster_observations([a, b])
|
||||
assert labels["a"] != labels["b"]
|
||||
|
||||
|
||||
def test_cluster_observations_asn_alone_does_not_fuse():
|
||||
"""Two observations sharing only ASN must stay distinct.
|
||||
Fixture 2's failure mode in miniature — VPN/proxy hopping
|
||||
fragments ASN within a single identity, and ASN sharing
|
||||
across identities is common; can't drive clustering."""
|
||||
a = Observation(observation_id="a", asn=64500)
|
||||
b = Observation(observation_id="b", asn=64500)
|
||||
labels = cluster_observations([a, b])
|
||||
assert labels["a"] != labels["b"]
|
||||
|
||||
|
||||
def test_cluster_observations_all_weak_signals_combined_does_not_fuse():
|
||||
"""Even credentials + commands + ASN together don't drive
|
||||
clustering — only a high-tier signal does. Stack everything
|
||||
a campaign-level F1+F2 hybrid would have, confirm singletons."""
|
||||
a = Observation(
|
||||
observation_id="a",
|
||||
asn=64500,
|
||||
credentials=frozenset({("root", "toor"), ("admin", "admin")}),
|
||||
commands_by_phase={"discovery": ("ls", "id")},
|
||||
)
|
||||
b = Observation(
|
||||
observation_id="b",
|
||||
asn=64500,
|
||||
credentials=frozenset({("root", "toor"), ("admin", "admin")}),
|
||||
commands_by_phase={"discovery": ("ls", "id")},
|
||||
)
|
||||
labels = cluster_observations([a, b])
|
||||
assert labels["a"] != labels["b"]
|
||||
|
||||
|
||||
def test_shared_wordlist_no_false_merge_at_identity_level():
|
||||
"""F1 ratchet: even at identity level (where each row is its own
|
||||
identity), the production clusterer must not fuse credential-
|
||||
sharing observations. Tightens the F1 bound by asserting
|
||||
completeness == 1.0 at identity-level scoring (no truth identity
|
||||
is split, because every row is its own truth identity)."""
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
from tests.clustering.metrics import score
|
||||
|
||||
corpus = generate(load_yaml(FIXTURE_DIR / "shared_wordlist.yaml"), seed=0)
|
||||
pred = _production_clusterer_predict(corpus)
|
||||
metrics = score(corpus.truth_labels(level="identity"), pred)
|
||||
# Each row must land in its own predicted cluster — anything else
|
||||
# is a false merge driven by the credential-overlap signal.
|
||||
assert len(set(pred.values())) == len(corpus.attackers)
|
||||
assert metrics["homogeneity"] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_vpn_hopping_asn_alone_would_have_fragmented_but_doesnt():
|
||||
"""F2 ratchet: vpn_hopping has 5 distinct ASNs across one identity.
|
||||
A clusterer that lets ASN drive would split into 5; the production
|
||||
clusterer doesn't because ASN is very-low-tier and JA3 / HASSH
|
||||
are stable. Confirms tier discipline holds end-to-end."""
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
corpus = generate(load_yaml(FIXTURE_DIR / "vpn_hopping.yaml"), seed=0)
|
||||
pred = _production_clusterer_predict(corpus)
|
||||
asns = {a.asn for a in corpus.attackers}
|
||||
assert len(asns) == 5, "fixture sanity: 5 distinct ASNs"
|
||||
# All 5 land in one cluster, not 5.
|
||||
assert len(set(pred.values())) == 1
|
||||
|
||||
|
||||
def test_cluster_observations_medium_alone_does_not_fuse():
|
||||
"""Two observations sharing only command-sequence (medium-tier)
|
||||
must stay in distinct clusters — medium is a supporting signal."""
|
||||
a = Observation(
|
||||
observation_id="a",
|
||||
commands_by_phase={"discovery": ("ls", "id", "uname")},
|
||||
)
|
||||
b = Observation(
|
||||
observation_id="b",
|
||||
commands_by_phase={"discovery": ("ls", "id", "uname")},
|
||||
)
|
||||
labels = cluster_observations([a, b])
|
||||
assert labels["a"] != labels["b"]
|
||||
|
||||
|
||||
def _build_noise_floor_corpus():
|
||||
"""Expand noise_floor.yaml's include_fixtures block into one corpus."""
|
||||
import yaml as _yaml
|
||||
from typing import Any
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
declared = _yaml.safe_load(
|
||||
(FIXTURE_DIR / "noise_floor.yaml").read_text(encoding="utf-8")
|
||||
)
|
||||
campaigns: list[dict[str, Any]] = []
|
||||
inherited_noise = 0
|
||||
for fname in declared["include_fixtures"]:
|
||||
sub = load_yaml(FIXTURE_DIR / fname)
|
||||
if "corpus" in sub:
|
||||
campaigns.extend(sub["corpus"].get("campaigns", []))
|
||||
inherited_noise += int(
|
||||
(sub["corpus"].get("noise") or {}).get("scanner_count", 0)
|
||||
)
|
||||
else:
|
||||
campaigns.append({"campaign": sub["campaign"]})
|
||||
extra = int(declared.get("extra_noise_scanners", 0))
|
||||
spec = {"corpus": {
|
||||
"campaigns": campaigns,
|
||||
"noise": {"scanner_count": inherited_noise + extra},
|
||||
}}
|
||||
return generate(spec, seed=0)
|
||||
|
||||
|
||||
def test_noise_floor_singleton_recall_holds_with_production_clusterer():
|
||||
"""Fixture 6 ratchet — noise floor isolation.
|
||||
|
||||
The load-bearing F6 invariant for the *production* clusterer:
|
||||
truth-singleton noise scanners must not be absorbed into real
|
||||
campaigns. A clusterer that pulls noise into campaigns dilutes
|
||||
attribution to nothing.
|
||||
|
||||
Scored at *campaign* level so the truth-singleton noise scanners
|
||||
align with the prediction (each noise row has its own truth
|
||||
campaign id). Identity-level scoring is muddier here — see
|
||||
``test_noise_floor_intra_campaign_recovery`` below for the
|
||||
constituent-campaign test that *is* identity-shaped.
|
||||
"""
|
||||
from tests.clustering.metrics import score
|
||||
|
||||
corpus = _build_noise_floor_corpus()
|
||||
pred = _production_clusterer_predict(corpus)
|
||||
metrics = score(corpus.truth_labels(level="campaign"), pred)
|
||||
assert metrics["singleton_recall"] >= 0.95, metrics
|
||||
|
||||
|
||||
def test_noise_floor_intra_campaign_recovery_with_production_clusterer():
|
||||
"""The other half of F6: real campaigns must still resolve through
|
||||
the noise. Specifically: vpn_hopping's 5 rotations land in one
|
||||
cluster (its identity-level signature), and shared_wordlist's two
|
||||
distinct campaigns stay un-merged despite sharing wordlists.
|
||||
Demonstrates the production clusterer's tier discipline holds
|
||||
under cross-corpus interference, not just per-fixture in
|
||||
isolation."""
|
||||
corpus = _build_noise_floor_corpus()
|
||||
pred = _production_clusterer_predict(corpus)
|
||||
|
||||
# vpn_hopping: all 5 rotation rows fold into one predicted cluster.
|
||||
vpn_obs = [
|
||||
a.attacker_id for a in corpus.attackers
|
||||
if a.truth_campaign_id == "vpn-hopping-001"
|
||||
]
|
||||
assert len(vpn_obs) == 5
|
||||
vpn_clusters = {pred[oid] for oid in vpn_obs}
|
||||
assert len(vpn_clusters) == 1, (
|
||||
"vpn_hopping must consolidate to one cluster across rotations"
|
||||
)
|
||||
|
||||
# shared_wordlist A and B: distinct fingerprints → must stay
|
||||
# separate clusters despite shared credentials in the noise floor.
|
||||
sw_a = [
|
||||
a.attacker_id for a in corpus.attackers
|
||||
if a.truth_campaign_id == "shared-wordlist-A"
|
||||
]
|
||||
sw_b = [
|
||||
a.attacker_id for a in corpus.attackers
|
||||
if a.truth_campaign_id == "shared-wordlist-B"
|
||||
]
|
||||
assert sw_a and sw_b
|
||||
sw_a_clusters = {pred[oid] for oid in sw_a}
|
||||
sw_b_clusters = {pred[oid] for oid in sw_b}
|
||||
assert sw_a_clusters.isdisjoint(sw_b_clusters), (
|
||||
"shared_wordlist A and B must not share a cluster"
|
||||
)
|
||||
|
||||
|
||||
def test_slow_burn_passes_with_production_clusterer():
|
||||
"""Fixture 7 (slow_burn): one campaign across 3 multi-week operational
|
||||
windows. Shared JA3 + HASSH + C2 across all 3 actors. The production
|
||||
clusterer must fold them into one cluster — *despite* the multi-week
|
||||
silence between windows. Time-agnostic invariant in action."""
|
||||
from tests.clustering.fixture_harness import assert_fixture_bounds
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
corpus = generate(load_yaml(FIXTURE_DIR / "slow_burn.yaml"), seed=0)
|
||||
metrics = assert_fixture_bounds(
|
||||
corpus, _production_clusterer_predict,
|
||||
FIXTURE_DIR / "slow_burn.expected.yaml",
|
||||
)
|
||||
pred = _production_clusterer_predict(corpus)
|
||||
# All three operational windows in one cluster — the F7 contract.
|
||||
assert len(set(pred.values())) == 1
|
||||
assert metrics["completeness"] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_slow_burn_time_shift_invariance():
|
||||
"""Time-agnostic invariant in execution: shifting every observation's
|
||||
session timestamps by an arbitrary delta must not change the
|
||||
predicted clusters. This is the runtime counterpart of the
|
||||
Observation-no-time-fields static check in test_similarity.py."""
|
||||
from datetime import timedelta
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
corpus = generate(load_yaml(FIXTURE_DIR / "slow_burn.yaml"), seed=0)
|
||||
baseline = _production_clusterer_predict(corpus)
|
||||
|
||||
# Shift every session by +90 days (a full multi-month gap) and
|
||||
# re-cluster. Predicted membership must be identical.
|
||||
for att in corpus.attackers:
|
||||
att.first_seen += timedelta(days=90)
|
||||
att.last_seen += timedelta(days=90)
|
||||
for s in att.sessions:
|
||||
s.started_at += timedelta(days=90)
|
||||
|
||||
shifted = _production_clusterer_predict(corpus)
|
||||
# Cluster ids may differ as opaque labels but membership groupings
|
||||
# must match. Convert each prediction to canonical form: a set of
|
||||
# frozensets of co-clustered observation_ids.
|
||||
def _canonical(pred: dict[str, str]) -> set[frozenset[str]]:
|
||||
groups: dict[str, set[str]] = {}
|
||||
for oid, cid in pred.items():
|
||||
groups.setdefault(cid, set()).add(oid)
|
||||
return {frozenset(g) for g in groups.values()}
|
||||
|
||||
assert _canonical(baseline) == _canonical(shifted)
|
||||
|
||||
|
||||
def test_vpn_hopping_passes_at_identity_level_with_production_clusterer():
|
||||
"""Fixture 2: one rotating actor with stable JA3 + HASSH across
|
||||
5 ASNs. The production clusterer must fold all 5 observations into
|
||||
one identity (high-weight JA3 / HASSH agreement)."""
|
||||
from tests.clustering.fixture_harness import assert_fixture_bounds
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
corpus = generate(load_yaml(FIXTURE_DIR / "vpn_hopping.yaml"), seed=0)
|
||||
metrics = assert_fixture_bounds(
|
||||
corpus, _production_clusterer_predict,
|
||||
FIXTURE_DIR / "vpn_hopping.expected.yaml",
|
||||
truth_level="identity",
|
||||
)
|
||||
assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
|
||||
assert metrics["completeness"] == pytest.approx(1.0)
|
||||
278
tests/clustering/test_fixtures_campaign_clusterer.py
Normal file
278
tests/clustering/test_fixtures_campaign_clusterer.py
Normal file
@@ -0,0 +1,278 @@
|
||||
"""Run the production campaign clusterer through all 7 fixtures.
|
||||
|
||||
The 7 fixtures' YAML bounds were tuned for *reference* clusterers
|
||||
(``c2_callback_clusterer``, ``composite_signals_clusterer``, etc.).
|
||||
The production campaign clusterer (``ConnectedComponentsCampaignClusterer``)
|
||||
is the system under test now; this module asserts it meets every
|
||||
existing bound, plus a few stricter per-fixture invariants where the
|
||||
algorithm should — by design — score perfectly.
|
||||
|
||||
The pure path is what's exercised here: ``cluster_identities``
|
||||
operating over ``IdentityFeatures`` projected via
|
||||
``from_synthetic_identity``. Each ``SyntheticAttacker`` is treated as
|
||||
one identity (identity layer is below; the campaign clusterer reads
|
||||
identities). End-to-end DB-backed validation is in
|
||||
``test_campaign_worker.py``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from decnet.clustering.campaign.impl.connected_components import (
|
||||
cluster_identities,
|
||||
)
|
||||
from decnet.clustering.campaign.impl.similarity import (
|
||||
IdentityFeatures,
|
||||
from_synthetic_identity,
|
||||
)
|
||||
from decnet.clustering.impl.connected_components import cluster_observations
|
||||
from decnet.clustering.impl.similarity import from_synthetic
|
||||
from tests.clustering.fixture_harness import assert_fixture_bounds
|
||||
from tests.clustering.metrics import score
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
|
||||
|
||||
|
||||
def _load_corpus(yaml_name: str) -> Any:
|
||||
"""Load a fixture; expand the noise_floor composite if required."""
|
||||
path = FIXTURE_DIR / yaml_name
|
||||
raw = yaml.safe_load(path.read_text(encoding="utf-8"))
|
||||
if "include_fixtures" in raw:
|
||||
# Mirror tests/clustering/test_noise_floor_fixture.py's expander —
|
||||
# noise_floor is the only fixture that uses this format.
|
||||
campaigns: list[dict[str, Any]] = []
|
||||
inherited_noise = 0
|
||||
for fname in raw["include_fixtures"]:
|
||||
sub = load_yaml(FIXTURE_DIR / fname)
|
||||
if "corpus" in sub:
|
||||
campaigns.extend(sub["corpus"].get("campaigns", []))
|
||||
inherited_noise += int(
|
||||
(sub["corpus"].get("noise") or {}).get("scanner_count", 0)
|
||||
)
|
||||
else:
|
||||
campaigns.append({"campaign": sub["campaign"]})
|
||||
extra = int(raw.get("extra_noise_scanners", 0))
|
||||
spec: Any = {
|
||||
"corpus": {
|
||||
"campaigns": campaigns,
|
||||
"noise": {"scanner_count": inherited_noise + extra},
|
||||
}
|
||||
}
|
||||
return generate(spec, seed=0)
|
||||
return generate(load_yaml(path), seed=0)
|
||||
|
||||
|
||||
def production_campaign_clusterer(corpus) -> dict[str, str]:
|
||||
"""Predict-fn adapter — chains identity + campaign clustering.
|
||||
|
||||
Mirrors the production pipeline: the identity clusterer groups
|
||||
rotated-IP observations into identities, then the campaign
|
||||
clusterer groups identities into campaigns. The harness scores
|
||||
``{attacker_id: cluster_id}`` so the chain preserves the
|
||||
attacker → identity → campaign mapping.
|
||||
"""
|
||||
# ── Layer 1: identity clustering over observations.
|
||||
obs_list = [from_synthetic(a) for a in corpus.attackers]
|
||||
obs_labels = cluster_observations(obs_list)
|
||||
|
||||
# Group attackers by their identity cluster.
|
||||
by_identity: dict[str, list] = {}
|
||||
for a in corpus.attackers:
|
||||
by_identity.setdefault(obs_labels[a.attacker_id], []).append(a)
|
||||
|
||||
# ── Layer 2: aggregate each identity's member observations into
|
||||
# one ``IdentityFeatures``, run campaign clustering.
|
||||
identity_features: list[IdentityFeatures] = []
|
||||
for identity_id, members in by_identity.items():
|
||||
identity_features.append(_merge_features(identity_id, members))
|
||||
campaign_labels = cluster_identities(identity_features)
|
||||
|
||||
# ── Map attacker_id → campaign cluster id via the identity hop.
|
||||
return {
|
||||
a.attacker_id: campaign_labels[obs_labels[a.attacker_id]]
|
||||
for a in corpus.attackers
|
||||
}
|
||||
|
||||
|
||||
def _merge_features(identity_uuid: str, members) -> IdentityFeatures:
|
||||
"""Aggregate per-attacker IdentityFeatures into a single identity.
|
||||
|
||||
Set fields union; per-decky maps are merged (first/last seen
|
||||
extends across all member observations); session windows
|
||||
concatenate.
|
||||
"""
|
||||
parts = [from_synthetic_identity(a, identity_uuid=identity_uuid) for a in members]
|
||||
|
||||
asn_cohort: set[int] = set()
|
||||
payload_hashes: set[str] = set()
|
||||
c2_endpoints: set[str] = set()
|
||||
decky_set: set[str] = set()
|
||||
session_windows: list[tuple[float, float]] = []
|
||||
last_phase_per_decky: dict[str, str] = {}
|
||||
first_phase_per_decky: dict[str, str] = {}
|
||||
last_seen_per_decky: dict[str, float] = {}
|
||||
first_seen_per_decky: dict[str, float] = {}
|
||||
commands_by_phase_on_decky: dict[tuple[str, str], list[str]] = {}
|
||||
|
||||
for p in parts:
|
||||
asn_cohort |= p.asn_cohort
|
||||
payload_hashes |= p.payload_hashes
|
||||
c2_endpoints |= p.c2_endpoints
|
||||
decky_set |= p.decky_set
|
||||
session_windows.extend(p.session_windows)
|
||||
for decky, ts in p.first_seen_per_decky.items():
|
||||
cur = first_seen_per_decky.get(decky)
|
||||
if cur is None or ts < cur:
|
||||
first_seen_per_decky[decky] = ts
|
||||
first_phase_per_decky[decky] = p.first_phase_per_decky.get(decky, "")
|
||||
for decky, ts in p.last_seen_per_decky.items():
|
||||
cur = last_seen_per_decky.get(decky)
|
||||
if cur is None or ts > cur:
|
||||
last_seen_per_decky[decky] = ts
|
||||
last_phase_per_decky[decky] = p.last_phase_per_decky.get(decky, "")
|
||||
for key, cmds in p.commands_by_phase_on_decky.items():
|
||||
commands_by_phase_on_decky.setdefault(key, []).extend(cmds)
|
||||
|
||||
return IdentityFeatures(
|
||||
identity_uuid=identity_uuid,
|
||||
asn_cohort=frozenset(asn_cohort),
|
||||
payload_hashes=frozenset(payload_hashes),
|
||||
c2_endpoints=frozenset(c2_endpoints),
|
||||
decky_set=frozenset(decky_set),
|
||||
session_windows=tuple(session_windows),
|
||||
last_phase_per_decky=last_phase_per_decky,
|
||||
first_phase_per_decky=first_phase_per_decky,
|
||||
last_seen_per_decky=last_seen_per_decky,
|
||||
first_seen_per_decky=first_seen_per_decky,
|
||||
commands_by_phase_on_decky={
|
||||
k: tuple(v) for k, v in commands_by_phase_on_decky.items()
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ─── Per-fixture bound assertions ───────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"yaml_name,expected_name,truth_level",
|
||||
[
|
||||
("lone_wolf.yaml", "lone_wolf.expected.yaml", "campaign"),
|
||||
("shared_wordlist.yaml", "shared_wordlist.expected.yaml", "campaign"),
|
||||
("vpn_hopping.yaml", "vpn_hopping.expected.yaml", "campaign"),
|
||||
("paused_campaign.yaml", "paused_campaign.expected.yaml", "campaign"),
|
||||
("multi_operator.yaml", "multi_operator.expected.yaml", "campaign"),
|
||||
("noise_floor.yaml", "noise_floor.expected.yaml", "campaign"),
|
||||
("slow_burn.yaml", "slow_burn.expected.yaml", "campaign"),
|
||||
],
|
||||
)
|
||||
def test_production_campaign_clusterer_passes_fixture_bounds(
|
||||
yaml_name: str, expected_name: str, truth_level: str,
|
||||
) -> None:
|
||||
corpus = _load_corpus(yaml_name)
|
||||
assert_fixture_bounds(
|
||||
corpus,
|
||||
production_campaign_clusterer,
|
||||
FIXTURE_DIR / expected_name,
|
||||
truth_level=truth_level,
|
||||
)
|
||||
|
||||
|
||||
# ─── Per-fixture sharpness assertions (production clusterer specifics) ─────
|
||||
#
|
||||
# These tighten the YAML bounds for fixtures where the production
|
||||
# clusterer is expected to score *perfectly*. They live as Python
|
||||
# assertions (not YAML) so they only gate the production clusterer —
|
||||
# the YAML bounds stay loose for the reference-clusterer tests in the
|
||||
# per-fixture files. Ratcheting these up over time is safe; the YAML
|
||||
# bounds remain the floor that *every* tested clusterer must beat.
|
||||
|
||||
|
||||
def test_f3_lone_wolf_perfect_score() -> None:
|
||||
"""Every actor a singleton — campaign clusterer should match."""
|
||||
corpus = _load_corpus("lone_wolf.yaml")
|
||||
pred = production_campaign_clusterer(corpus)
|
||||
metrics = score(corpus.truth_labels(level="campaign"), pred)
|
||||
assert metrics["singleton_recall"] == pytest.approx(1.0)
|
||||
assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_f1_shared_wordlist_no_false_merge() -> None:
|
||||
"""Two campaigns burning the same wordlist must NOT fuse."""
|
||||
corpus = _load_corpus("shared_wordlist.yaml")
|
||||
pred = production_campaign_clusterer(corpus)
|
||||
truth = corpus.truth_labels(level="campaign")
|
||||
# Predicted: each truth-class member should have its own cluster id
|
||||
# (they share no payload / c2 / phase-handoff).
|
||||
truth_to_pred: dict[str, set[str]] = {}
|
||||
for aid, t in truth.items():
|
||||
truth_to_pred.setdefault(t, set()).add(pred[aid])
|
||||
# No predicted cluster spans two truth campaigns.
|
||||
pred_to_truth: dict[str, set[str]] = {}
|
||||
for aid, p in pred.items():
|
||||
pred_to_truth.setdefault(p, set()).add(truth[aid])
|
||||
assert all(len(s) == 1 for s in pred_to_truth.values()), (
|
||||
f"shared_wordlist: predicted cluster spans multiple campaigns: "
|
||||
f"{pred_to_truth}"
|
||||
)
|
||||
|
||||
|
||||
def test_f5_multi_operator_folds_to_one_campaign() -> None:
|
||||
"""Two operators with shared payload + C2 + phase-handoff fold to one campaign."""
|
||||
corpus = _load_corpus("multi_operator.yaml")
|
||||
pred = production_campaign_clusterer(corpus)
|
||||
cluster_ids = set(pred.values())
|
||||
assert len(cluster_ids) == 1, (
|
||||
f"multi_operator: expected 1 campaign, got {len(cluster_ids)} — "
|
||||
f"predictions: {pred}"
|
||||
)
|
||||
metrics = score(corpus.truth_labels(level="campaign"), pred)
|
||||
assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_f7_slow_burn_time_shift_invariance() -> None:
|
||||
"""Shift every timestamp +90 days — predictions must be identical.
|
||||
|
||||
The pure F7 invariant: campaign edges are pairwise-relative; an
|
||||
absolute shift on every session must not change any cluster
|
||||
assignment. Mirrors the identity-side check in
|
||||
``test_slow_burn_fixture.py``.
|
||||
"""
|
||||
from datetime import timedelta
|
||||
|
||||
corpus = _load_corpus("slow_burn.yaml")
|
||||
base_pred = production_campaign_clusterer(corpus)
|
||||
|
||||
delta = timedelta(days=90)
|
||||
for a in corpus.attackers:
|
||||
a.first_seen = a.first_seen + delta
|
||||
a.last_seen = a.last_seen + delta
|
||||
for s in a.sessions:
|
||||
s.started_at = s.started_at + delta
|
||||
|
||||
shifted_pred = production_campaign_clusterer(corpus)
|
||||
|
||||
# Cluster id labels are opaque — what matters is the partition.
|
||||
base_partition = _partition(base_pred)
|
||||
shifted_partition = _partition(shifted_pred)
|
||||
assert base_partition == shifted_partition, (
|
||||
f"slow_burn: +90d shift changed the predicted partition\n"
|
||||
f"base: {base_partition}\n"
|
||||
f"shifted: {shifted_partition}"
|
||||
)
|
||||
|
||||
|
||||
def _partition(labels: dict[str, str]) -> set[frozenset[str]]:
|
||||
"""Return the cluster partition (set of frozensets of member ids).
|
||||
|
||||
Cluster id strings are arbitrary; the equivalence we care about is
|
||||
"which ids ended up in the same cluster?".
|
||||
"""
|
||||
by_cluster: dict[str, set[str]] = {}
|
||||
for member, cluster_id in labels.items():
|
||||
by_cluster.setdefault(cluster_id, set()).add(member)
|
||||
return {frozenset(s) for s in by_cluster.values()}
|
||||
74
tests/clustering/test_lone_wolf_fixture.py
Normal file
74
tests/clustering/test_lone_wolf_fixture.py
Normal file
@@ -0,0 +1,74 @@
|
||||
"""
|
||||
End-to-end pipeline test for fixture 3 (lone_wolf).
|
||||
|
||||
Loads the YAML spec, runs the synthetic generator, applies the
|
||||
identity-clusterer placeholder (each attacker → its own cluster), and
|
||||
scores against the expected bounds. This is the simplest of the six
|
||||
fixtures and is deliberately the first one wired up — its ground truth
|
||||
is all singletons, so an identity clusterer trivially passes, which
|
||||
proves the DSL → factory → metrics pipeline works before any real
|
||||
algorithm is built.
|
||||
|
||||
Once the connected-components clusterer (CAMPAIGN_CLUSTERING.md §4)
|
||||
lands, the same fixture must continue to pass.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.clustering.fixture_harness import (
|
||||
assert_fixture_bounds,
|
||||
identity_clusterer,
|
||||
)
|
||||
from tests.clustering.metrics import score
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
|
||||
|
||||
|
||||
def test_lone_wolf_pipeline_passes_bounds() -> None:
|
||||
spec = load_yaml(FIXTURE_DIR / "lone_wolf.yaml")
|
||||
corpus = generate(spec, seed=0)
|
||||
assert_fixture_bounds(corpus, identity_clusterer, FIXTURE_DIR / "lone_wolf.expected.yaml")
|
||||
|
||||
|
||||
def test_lone_wolf_corpus_shape() -> None:
|
||||
"""Sanity: 1 wolf + 8 noise scanners = 9 attackers, 9 sessions."""
|
||||
spec = load_yaml(FIXTURE_DIR / "lone_wolf.yaml")
|
||||
corpus = generate(spec, seed=0)
|
||||
assert len(corpus.attackers) == 9
|
||||
assert len(corpus.sessions) == 9
|
||||
truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
|
||||
assert len(truth_campaigns) == 9
|
||||
|
||||
|
||||
def test_identity_clusterer_fails_on_a_real_campaign() -> None:
|
||||
"""
|
||||
Sanity for the harness, NOT a test of the clusterer: a real
|
||||
multi-actor campaign should make the placeholder identity clusterer
|
||||
fail completeness, since each truth-campaign gets fragmented into
|
||||
one-member clusters. If this didn't fail, our metrics would be
|
||||
blind to false splits — and that's the entire point of fixtures 4
|
||||
and 5 in the design doc.
|
||||
"""
|
||||
spec = {
|
||||
"campaign": {
|
||||
"id": "c-real",
|
||||
"actors": [
|
||||
{"id": "a-1", "asn": 14061},
|
||||
{"id": "a-2", "asn": 14061},
|
||||
],
|
||||
"phases": [
|
||||
{"name": "delivery", "actor": "a-1"},
|
||||
{"name": "discovery", "actor": "a-2"},
|
||||
],
|
||||
"duration_days": 1,
|
||||
}
|
||||
}
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = identity_clusterer(corpus)
|
||||
metrics = score(corpus.truth_labels(), pred)
|
||||
assert metrics["completeness"] < 1.0
|
||||
assert metrics["homogeneity"] == pytest.approx(1.0)
|
||||
76
tests/clustering/test_metrics.py
Normal file
76
tests/clustering/test_metrics.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""Sanity tests for the clustering metric harness."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.clustering.metrics import (
|
||||
adjusted_rand_index,
|
||||
completeness,
|
||||
homogeneity,
|
||||
score,
|
||||
singleton_recall,
|
||||
)
|
||||
|
||||
|
||||
def test_perfect_agreement_scores_one() -> None:
|
||||
truth = {"a": "C1", "b": "C1", "c": "C2", "d": "C2"}
|
||||
# Same partition, different label names — clustering doesn't preserve
|
||||
# names, so renamed-but-isomorphic must still score 1.0.
|
||||
pred = {"a": "X", "b": "X", "c": "Y", "d": "Y"}
|
||||
s = score(truth, pred)
|
||||
assert s["adjusted_rand_index"] == pytest.approx(1.0)
|
||||
assert s["homogeneity"] == pytest.approx(1.0)
|
||||
assert s["completeness"] == pytest.approx(1.0)
|
||||
assert s["singleton_recall"] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_all_singletons_perfect() -> None:
|
||||
truth = {"a": "A", "b": "B", "c": "C"}
|
||||
pred = {"a": "1", "b": "2", "c": "3"}
|
||||
s = score(truth, pred)
|
||||
assert s["singleton_recall"] == pytest.approx(1.0)
|
||||
assert s["adjusted_rand_index"] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_false_merge_drops_homogeneity() -> None:
|
||||
truth = {"a": "C1", "b": "C2"}
|
||||
pred = {"a": "X", "b": "X"} # merged two distinct campaigns
|
||||
assert homogeneity(truth, pred) == pytest.approx(0.0)
|
||||
# Completeness is fine (each true class lives in one cluster).
|
||||
assert completeness(truth, pred) == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_false_split_drops_completeness() -> None:
|
||||
truth = {"a": "C1", "b": "C1"}
|
||||
pred = {"a": "X", "b": "Y"} # split one campaign into two clusters
|
||||
assert completeness(truth, pred) == pytest.approx(0.0)
|
||||
assert homogeneity(truth, pred) == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_singleton_recall_penalises_noise_absorption() -> None:
|
||||
# 3 lone wolves + 1 real campaign with 2 members.
|
||||
truth = {"w1": "wolf1", "w2": "wolf2", "w3": "wolf3", "c1": "C", "c2": "C"}
|
||||
# Clusterer absorbs all wolves into the campaign.
|
||||
pred = dict.fromkeys(truth, "BIG")
|
||||
assert singleton_recall(truth, pred) == pytest.approx(0.0)
|
||||
# And a clusterer that keeps wolves singleton should score 1.0
|
||||
# on this metric, regardless of what it does with the campaign.
|
||||
pred_ok = {"w1": "1", "w2": "2", "w3": "3", "c1": "C", "c2": "C"}
|
||||
assert singleton_recall(truth, pred_ok) == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_mismatched_item_sets_raises() -> None:
|
||||
with pytest.raises(ValueError):
|
||||
adjusted_rand_index({"a": "X"}, {"b": "Y"})
|
||||
|
||||
|
||||
def test_random_labels_low_ari() -> None:
|
||||
# ARI of an arbitrary partition vs. ground truth should be near 0,
|
||||
# not near 1 — this is the chance-correction guarantee.
|
||||
truth = {f"i{n}": f"C{n // 4}" for n in range(20)}
|
||||
# Pred that ignores truth: just shuffles items into 5 buckets in
|
||||
# an order uncorrelated with truth.
|
||||
pred = {f"i{n}": f"X{(n * 7) % 5}" for n in range(20)}
|
||||
ari = adjusted_rand_index(truth, pred)
|
||||
# Loose bound — the point is "much closer to 0 than to 1".
|
||||
assert ari < 0.3
|
||||
134
tests/clustering/test_multi_operator_fixture.py
Normal file
134
tests/clustering/test_multi_operator_fixture.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
End-to-end pipeline test for fixture 5 (multi_operator).
|
||||
|
||||
One campaign, two operators with distinct UKC roles, distinct
|
||||
tooling (different JA3 + HASSH), distinct ASNs and IPs, on
|
||||
opposite shift schedules. What ties them is shared C2 callback +
|
||||
shared stage-1 payload hash — the planned similarity graph's
|
||||
"payload simhash + C2 endpoint match" arms are what should resolve
|
||||
them as one campaign.
|
||||
|
||||
Three tests cover this:
|
||||
|
||||
1. `test_multi_operator_corpus_shape` — sanity: two attackers, one
|
||||
campaign, distinct fingerprints, shared C2 callback present in
|
||||
both rows' sessions, distinct shift hours.
|
||||
|
||||
2. `test_multi_operator_pipeline_passes_bounds` — runs
|
||||
`c2_callback_clusterer` (the appropriate pass-clusterer for
|
||||
this fixture, since fingerprint_clusterer would split the two
|
||||
distinct operators). Folds both rows into one cluster via the
|
||||
shared C2 endpoint.
|
||||
|
||||
3. `test_shift_clusterer_fragments_campaign` — runs the deliberately
|
||||
bad `shift_clusterer`. Actor A on night shift and Actor B on day
|
||||
shift split into two clusters → completeness collapses → the
|
||||
bound floor on completeness rejects the bad clusterer. This is
|
||||
the canonical proof that operational-schedule overlap is NOT a
|
||||
campaign signal.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.clustering.fixture_harness import (
|
||||
assert_fixture_bounds,
|
||||
c2_callback_clusterer,
|
||||
fingerprint_clusterer,
|
||||
shift_clusterer,
|
||||
)
|
||||
from tests.clustering.metrics import score
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
|
||||
FIXTURE_YAML = FIXTURE_DIR / "multi_operator.yaml"
|
||||
EXPECTED_YAML = FIXTURE_DIR / "multi_operator.expected.yaml"
|
||||
|
||||
|
||||
def test_multi_operator_corpus_shape() -> None:
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
assert len(corpus.attackers) == 2
|
||||
truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
|
||||
assert truth_campaigns == {"multi-operator-001"}
|
||||
# Two distinct fingerprints — the operators are different people
|
||||
# using different tools.
|
||||
ja3s = {a.ja3 for a in corpus.attackers}
|
||||
hasshs = {a.hassh for a in corpus.attackers}
|
||||
assert len(ja3s) == 2
|
||||
assert len(hasshs) == 2
|
||||
# Shared C2 callback across both rows' sessions.
|
||||
by_actor = {a.truth_actor_id: a for a in corpus.attackers}
|
||||
broker = by_actor["ops-broker-night"]
|
||||
postex = by_actor["ops-postex-day"]
|
||||
broker_c2s = {s.c2_callback for s in broker.sessions if s.c2_callback}
|
||||
postex_c2s = {s.c2_callback for s in postex.sessions if s.c2_callback}
|
||||
assert "c2.shared-op.example" in broker_c2s
|
||||
assert "c2.shared-op.example" in postex_c2s
|
||||
# Shifts are disjoint — load-bearing for the adversarial test.
|
||||
broker_hours = {s.started_at.hour for s in broker.sessions}
|
||||
postex_hours = {s.started_at.hour for s in postex.sessions}
|
||||
assert broker_hours <= {22, 23, 0, 1, 2, 3}
|
||||
assert postex_hours <= {9, 10, 11, 12, 13}
|
||||
|
||||
|
||||
def test_multi_operator_pipeline_passes_bounds() -> None:
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
metrics = assert_fixture_bounds(corpus, c2_callback_clusterer, EXPECTED_YAML)
|
||||
pred = c2_callback_clusterer(corpus)
|
||||
assert len(set(pred.values())) == 1, (
|
||||
"c2_callback_clusterer should fold both operators into one cluster"
|
||||
)
|
||||
assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_fingerprint_clusterer_cannot_resolve_this_fixture() -> None:
|
||||
"""
|
||||
Sanity for the harness, NOT a test of the clusterer: with two
|
||||
distinct fingerprints and one truth campaign,
|
||||
`fingerprint_clusterer` produces 2 clusters → completeness
|
||||
collapses. This is *why* the fixture's pass-clusterer is
|
||||
`c2_callback_clusterer` instead. Documents which signal
|
||||
actually carries the campaign here.
|
||||
"""
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = fingerprint_clusterer(corpus)
|
||||
assert len(set(pred.values())) == 2
|
||||
metrics = score(corpus.truth_labels(level="campaign"), pred)
|
||||
assert metrics["completeness"] == pytest.approx(0.0)
|
||||
|
||||
|
||||
def test_shift_clusterer_fragments_campaign() -> None:
|
||||
"""
|
||||
The fixture's reason for being. Bucket attackers by shift and
|
||||
the two operators land in 'night' and 'day' clusters → 2
|
||||
predicted clusters. Truth = 1 campaign → completeness collapses.
|
||||
|
||||
If this test ever passes (shift_clusterer satisfies the bounds),
|
||||
the fixture has lost its discrimination power.
|
||||
"""
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = shift_clusterer(corpus)
|
||||
buckets = set(pred.values())
|
||||
assert buckets == {"shift-night", "shift-day"}, (
|
||||
f"expected one night cluster + one day cluster, got {buckets}"
|
||||
)
|
||||
|
||||
metrics = score(corpus.truth_labels(level="campaign"), pred)
|
||||
assert metrics["completeness"] == pytest.approx(0.0)
|
||||
|
||||
bounds = {
|
||||
"adjusted_rand_index": 0.85,
|
||||
"homogeneity": 0.90,
|
||||
"completeness": 0.80,
|
||||
"singleton_recall": 0.95,
|
||||
}
|
||||
breaches = [k for k, floor in bounds.items() if metrics[k] < floor]
|
||||
assert "completeness" in breaches, (
|
||||
f"fixture failed to catch the bad clusterer; observed metrics: {metrics}"
|
||||
)
|
||||
167
tests/clustering/test_noise_floor_fixture.py
Normal file
167
tests/clustering/test_noise_floor_fixture.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""
|
||||
End-to-end pipeline test for fixture 6 (noise_floor).
|
||||
|
||||
Composite corpus: bundles all five prior fixtures' campaigns + 10
|
||||
Delivery-only noise scanners on top of lone_wolf's 8 inherited
|
||||
ones. The fixture exists to catch cross-corpus interference —
|
||||
signal collisions, factory ID re-use, clusterer ambiguity that
|
||||
shows up only when multiple campaigns are scored together. Each
|
||||
constituent fixture already ships its own in-fixture adversarial
|
||||
test; fixture 6 covers a different failure class.
|
||||
|
||||
The composition is declared in `noise_floor.yaml` via an
|
||||
``include_fixtures`` block (a fixture-6-specific format). The
|
||||
loader in this test file expands it into a full
|
||||
``corpus.campaigns`` spec at runtime, so the factory itself stays
|
||||
unaware of the include mechanism.
|
||||
|
||||
Three tests cover this:
|
||||
|
||||
1. `test_noise_floor_corpus_integrity` — every constituent
|
||||
fixture's campaigns + actors are present in the merged corpus
|
||||
with their truth labels intact, and the 10 extra noise scanners
|
||||
are present alongside lone_wolf's 8 (truth-singletons all).
|
||||
|
||||
2. `test_noise_floor_pipeline_passes_bounds` — runs
|
||||
`composite_signals_clusterer` against the merged corpus.
|
||||
Approximates the planned similarity graph well enough that
|
||||
every campaign resolves and every singleton stays singleton.
|
||||
Trips the bound floors if any cross-fixture interference creeps
|
||||
in (signal collisions across fixtures' JA3/HASSH/C2 strings).
|
||||
|
||||
3. `test_noise_floor_singleton_recall_holds` — explicit assertion
|
||||
that every truth-singleton (the lone wolf, the 8 inherited noise
|
||||
scanners, the 10 extra noise scanners — 19 total) ends up in a
|
||||
singleton predicted cluster. Singleton recall is the load-
|
||||
bearing metric for this fixture.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from tests.clustering.fixture_harness import (
|
||||
assert_fixture_bounds,
|
||||
composite_signals_clusterer,
|
||||
)
|
||||
from tests.clustering.metrics import score
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
|
||||
FIXTURE_YAML = FIXTURE_DIR / "noise_floor.yaml"
|
||||
EXPECTED_YAML = FIXTURE_DIR / "noise_floor.expected.yaml"
|
||||
|
||||
|
||||
def _expand_noise_floor_spec() -> dict[str, Any]:
|
||||
"""Read noise_floor.yaml's include_fixtures block, load each
|
||||
constituent fixture, and merge their campaigns into one
|
||||
corpus-shaped spec. Returns a dict the factory's ``generate()``
|
||||
accepts as-is."""
|
||||
declared = yaml.safe_load(FIXTURE_YAML.read_text(encoding="utf-8"))
|
||||
campaigns: list[dict[str, Any]] = []
|
||||
inherited_noise = 0
|
||||
for fname in declared["include_fixtures"]:
|
||||
sub = load_yaml(FIXTURE_DIR / fname)
|
||||
if "corpus" in sub:
|
||||
campaigns.extend(sub["corpus"].get("campaigns", []))
|
||||
inherited_noise += int(
|
||||
(sub["corpus"].get("noise") or {}).get("scanner_count", 0)
|
||||
)
|
||||
else:
|
||||
campaigns.append({"campaign": sub["campaign"]})
|
||||
extra = int(declared.get("extra_noise_scanners", 0))
|
||||
return {
|
||||
"corpus": {
|
||||
"campaigns": campaigns,
|
||||
"noise": {"scanner_count": inherited_noise + extra},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def test_noise_floor_corpus_integrity() -> None:
|
||||
spec = _expand_noise_floor_spec()
|
||||
corpus = generate(spec, seed=0)
|
||||
|
||||
truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
|
||||
|
||||
# Every constituent fixture's campaign id appears in the merged
|
||||
# corpus. Any missing id means the loader dropped a fixture.
|
||||
expected_campaign_ids = {
|
||||
"shared-wordlist-A",
|
||||
"shared-wordlist-B",
|
||||
"vpn-hopping-001",
|
||||
"lone-wolf-001",
|
||||
"paused-campaign-001",
|
||||
"multi-operator-001",
|
||||
}
|
||||
assert expected_campaign_ids <= truth_campaigns, (
|
||||
f"missing campaign ids: {expected_campaign_ids - truth_campaigns}"
|
||||
)
|
||||
|
||||
# Noise scanner count: 8 inherited from lone_wolf + 10 added.
|
||||
noise_attackers = [
|
||||
a for a in corpus.attackers
|
||||
if a.truth_campaign_id.startswith("noise-scanner-")
|
||||
]
|
||||
assert len(noise_attackers) == 18
|
||||
|
||||
# Every noise scanner is its own truth-campaign (singleton).
|
||||
noise_truth = {a.truth_campaign_id for a in noise_attackers}
|
||||
assert len(noise_truth) == 18
|
||||
|
||||
# Real-campaign attackers: 2 (shared_wordlist) + 5 (vpn_hopping) +
|
||||
# 1 (lone_wolf wolf) + 2 (paused_campaign) + 2 (multi_operator)
|
||||
# = 12.
|
||||
real_attackers = [
|
||||
a for a in corpus.attackers
|
||||
if not a.truth_campaign_id.startswith("noise-scanner-")
|
||||
]
|
||||
assert len(real_attackers) == 12, (
|
||||
f"expected 12 campaign-driven attackers, got {len(real_attackers)}"
|
||||
)
|
||||
|
||||
|
||||
def test_noise_floor_pipeline_passes_bounds() -> None:
|
||||
spec = _expand_noise_floor_spec()
|
||||
corpus = generate(spec, seed=0)
|
||||
metrics = assert_fixture_bounds(corpus, composite_signals_clusterer, EXPECTED_YAML)
|
||||
# The combined corpus is heterogeneous — a perfect ARI is not
|
||||
# required (and the bound is loose at 0.85). Verify the harness
|
||||
# produced sensible numbers anyway.
|
||||
assert metrics["adjusted_rand_index"] >= 0.85
|
||||
assert metrics["singleton_recall"] >= 0.95
|
||||
|
||||
|
||||
def test_noise_floor_singleton_recall_holds() -> None:
|
||||
"""Every truth-singleton (lone wolf + 18 noise) must remain
|
||||
singleton under the composite clusterer. Noise absorption is the
|
||||
failure mode that makes campaign attribution useless in practice.
|
||||
"""
|
||||
spec = _expand_noise_floor_spec()
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = composite_signals_clusterer(corpus)
|
||||
|
||||
truth = corpus.truth_labels(level="campaign")
|
||||
from collections import Counter
|
||||
truth_counts = Counter(truth.values())
|
||||
pred_counts = Counter(pred.values())
|
||||
|
||||
true_singletons = [aid for aid, t in truth.items() if truth_counts[t] == 1]
|
||||
# Truth-singletons in this composite:
|
||||
# 1 lone wolf + 18 noise + 2 shared_wordlist actors (each
|
||||
# campaign has one actor; campaign size 1 means truth-singleton)
|
||||
# = 21.
|
||||
assert len(true_singletons) == 21, (
|
||||
f"expected 21 truth-singletons, got {len(true_singletons)}"
|
||||
)
|
||||
absorbed = [aid for aid in true_singletons if pred_counts[pred[aid]] != 1]
|
||||
assert not absorbed, (
|
||||
f"composite clusterer absorbed {len(absorbed)} singletons into "
|
||||
f"larger clusters: {absorbed[:5]}…"
|
||||
)
|
||||
|
||||
metrics = score(truth, pred)
|
||||
assert metrics["singleton_recall"] == pytest.approx(1.0)
|
||||
140
tests/clustering/test_paused_campaign_fixture.py
Normal file
140
tests/clustering/test_paused_campaign_fixture.py
Normal file
@@ -0,0 +1,140 @@
|
||||
"""
|
||||
End-to-end pipeline test for fixture 4 (paused_campaign).
|
||||
|
||||
One campaign, two operational windows separated by a multi-day
|
||||
silent stretch (days 3-5, 0-indexed [2, 4]). Modeled as two DSL
|
||||
actors sharing JA3 + HASSH + payload + C2 callback — the
|
||||
fingerprint-stable signals a real clusterer should resolve on.
|
||||
Their ``active_days`` differ so each row's sessions land in
|
||||
disjoint time ranges; this is what gives the adversarial
|
||||
``time_window_clusterer`` something to fragment.
|
||||
|
||||
Three tests cover this:
|
||||
|
||||
1. `test_paused_campaign_corpus_shape` — sanity: 2 attackers, both
|
||||
share campaign id, sessions are time-disjoint across the pause
|
||||
window.
|
||||
|
||||
2. `test_paused_campaign_pipeline_passes_bounds` —
|
||||
`fingerprint_clusterer` reference folds both rows into one
|
||||
cluster (shared JA3 + HASSH). Trivially green at campaign-level
|
||||
scoring; the test is a ratchet point for the real algorithm.
|
||||
|
||||
3. `test_time_window_clusterer_fragments_campaign` — runs the
|
||||
deliberately-bad `time_window_clusterer`. With a 4-day silent
|
||||
stretch and a 1-day union threshold, the two halves cannot be
|
||||
bridged → 2 clusters → completeness collapses → bound rejected.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.clustering.fixture_harness import (
|
||||
assert_fixture_bounds,
|
||||
fingerprint_clusterer,
|
||||
time_window_clusterer,
|
||||
)
|
||||
from tests.clustering.metrics import score
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
|
||||
FIXTURE_YAML = FIXTURE_DIR / "paused_campaign.yaml"
|
||||
EXPECTED_YAML = FIXTURE_DIR / "paused_campaign.expected.yaml"
|
||||
|
||||
|
||||
def test_paused_campaign_corpus_shape() -> None:
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
assert len(corpus.attackers) == 2
|
||||
truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
|
||||
assert truth_campaigns == {"paused-campaign-001"}
|
||||
# Both rows share the operator's JA3 and HASSH — load-bearing
|
||||
# signal for fingerprint_clusterer to fold them.
|
||||
ja3s = {a.ja3 for a in corpus.attackers}
|
||||
hasshs = {a.hassh for a in corpus.attackers}
|
||||
assert len(ja3s) == 1
|
||||
assert len(hasshs) == 1
|
||||
# Each row's session timeline lives in its actor's active_days.
|
||||
rows_by_actor = {a.truth_actor_id: a for a in corpus.attackers}
|
||||
sprint_1 = rows_by_actor["ops-sprint-1"]
|
||||
sprint_2 = rows_by_actor["ops-sprint-2"]
|
||||
sprint_1_days = {s.started_at.day for s in sprint_1.sessions}
|
||||
sprint_2_days = {s.started_at.day for s in sprint_2.sessions}
|
||||
# Epoch is 2026-01-01; active_days [0,1] → calendar days 1,2;
|
||||
# active_days [5,6] → calendar days 6,7.
|
||||
assert sprint_1_days <= {1, 2}, f"sprint-1 leaked outside its window: {sprint_1_days}"
|
||||
assert sprint_2_days <= {6, 7}, f"sprint-2 leaked outside its window: {sprint_2_days}"
|
||||
|
||||
|
||||
def test_paused_campaign_pipeline_passes_bounds() -> None:
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
metrics = assert_fixture_bounds(corpus, fingerprint_clusterer, EXPECTED_YAML)
|
||||
# Both rows share fingerprints → one predicted cluster.
|
||||
pred = fingerprint_clusterer(corpus)
|
||||
assert len(set(pred.values())) == 1
|
||||
# Truth = 1 campaign of 2 rows; pred = 1 cluster of 2 rows → ARI 1.0.
|
||||
assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_time_window_clusterer_fragments_campaign() -> None:
|
||||
"""
|
||||
The fixture's reason for being. With a 4-day silence between
|
||||
the two operational windows and a 1-day union threshold, the
|
||||
bad clusterer cannot bridge the gap. The campaign splits in
|
||||
two and completeness collapses.
|
||||
|
||||
If this test ever passes (time_window_clusterer satisfies the
|
||||
bounds), the fixture has lost its discrimination power.
|
||||
"""
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = time_window_clusterer(corpus, gap_days=1.0)
|
||||
assert len(set(pred.values())) == 2, (
|
||||
f"time-window clusterer should split into 2 clusters, got {len(set(pred.values()))}"
|
||||
)
|
||||
|
||||
metrics = score(corpus.truth_labels(level="campaign"), pred)
|
||||
assert metrics["completeness"] == pytest.approx(0.0)
|
||||
|
||||
bounds = {
|
||||
"adjusted_rand_index": 0.85,
|
||||
"homogeneity": 0.90,
|
||||
"completeness": 0.80,
|
||||
"singleton_recall": 0.95,
|
||||
}
|
||||
breaches = [k for k, floor in bounds.items() if metrics[k] < floor]
|
||||
assert "completeness" in breaches, (
|
||||
f"fixture failed to catch the bad clusterer; observed metrics: {metrics}"
|
||||
)
|
||||
|
||||
|
||||
def test_time_window_clusterer_with_huge_gap_does_not_fragment() -> None:
|
||||
"""
|
||||
Sanity for the time-window reference: with a gap larger than
|
||||
the campaign's silent stretch, the two halves union into one.
|
||||
Confirms the clusterer's behavior depends on the threshold,
|
||||
not on something unrelated. (Pause is days 3-5 → max separation
|
||||
between session ranges is ≈4 days; gap_days=10 must bridge.)
|
||||
"""
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = time_window_clusterer(corpus, gap_days=10.0)
|
||||
assert len(set(pred.values())) == 1
|
||||
|
||||
|
||||
def test_silent_stretch_actually_silent() -> None:
|
||||
"""No session may land inside the configured pause window."""
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
pause_calendar_days = {3, 4, 5} # 1-indexed; pause_windows [[2,4]] in 0-indexed
|
||||
leaked = [
|
||||
s for s in corpus.sessions
|
||||
if s.started_at.day in pause_calendar_days
|
||||
]
|
||||
assert not leaked, (
|
||||
f"sessions leaked into the silent stretch: "
|
||||
f"{[(s.session_id, s.started_at) for s in leaked]}"
|
||||
)
|
||||
117
tests/clustering/test_shared_wordlist_fixture.py
Normal file
117
tests/clustering/test_shared_wordlist_fixture.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
End-to-end pipeline test for fixture 1 (shared_wordlist).
|
||||
|
||||
Two campaigns. Same SSH credential wordlist. Everything else divergent
|
||||
— ASN, IPs, JA3, HASSH, active hours.
|
||||
|
||||
The fixture exists to defeat one specific failure mode: a clusterer
|
||||
that leans on credential-list overlap as a primary signal. Commodity
|
||||
wordlists (rockyou, defaults lists, top-1k common-credentials) are
|
||||
shared by hundreds of unrelated actors — credential overlap alone
|
||||
cannot identify a campaign.
|
||||
|
||||
Two tests cover this:
|
||||
|
||||
1. `test_shared_wordlist_pipeline_passes_bounds` — runs the placeholder
|
||||
identity clusterer against the fixture. Trivially green (each
|
||||
campaign has one actor → identity puts each in its own cluster).
|
||||
This is the ratchet point: when the real algorithm replaces the
|
||||
placeholder, this test must continue to pass.
|
||||
|
||||
2. `test_credential_jaccard_clusterer_fails_homogeneity` — runs a
|
||||
deliberately-bad clusterer that merges any two attackers whose
|
||||
credential sets overlap above 50% Jaccard. Proves the fixture
|
||||
actually catches what it's designed to catch: this clusterer DOES
|
||||
merge the two campaigns, and the fixture's homogeneity floor (0.90)
|
||||
is breached. If this test ever passes, our fixture or our metric
|
||||
harness is broken.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.clustering.fixture_harness import (
|
||||
assert_fixture_bounds,
|
||||
credential_jaccard_clusterer,
|
||||
identity_clusterer,
|
||||
)
|
||||
from tests.clustering.metrics import score
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
|
||||
|
||||
|
||||
def test_shared_wordlist_pipeline_passes_bounds() -> None:
|
||||
spec = load_yaml(FIXTURE_DIR / "shared_wordlist.yaml")
|
||||
corpus = generate(spec, seed=0)
|
||||
assert_fixture_bounds(
|
||||
corpus, identity_clusterer, FIXTURE_DIR / "shared_wordlist.expected.yaml"
|
||||
)
|
||||
|
||||
|
||||
def test_shared_wordlist_corpus_shape() -> None:
|
||||
"""Sanity: 2 campaigns × 1 actor = 2 attackers, 4 sessions
|
||||
(delivery + credential_access × 3 sessions per campaign)."""
|
||||
spec = load_yaml(FIXTURE_DIR / "shared_wordlist.yaml")
|
||||
corpus = generate(spec, seed=0)
|
||||
assert len(corpus.attackers) == 2
|
||||
truth = corpus.truth_labels()
|
||||
assert set(truth.values()) == {"shared-wordlist-A", "shared-wordlist-B"}
|
||||
# Each attacker should have at least one credential_access session
|
||||
# whose credentials_tried is the full shared list.
|
||||
for att in corpus.attackers:
|
||||
cred_sessions = [s for s in att.sessions if s.credentials_tried]
|
||||
assert cred_sessions, f"attacker {att.attacker_id} has no credential sessions"
|
||||
# All cred sessions should carry the same 8-entry wordlist.
|
||||
for s in cred_sessions:
|
||||
assert len(s.credentials_tried) == 8
|
||||
|
||||
|
||||
def test_credential_jaccard_clusterer_fails_homogeneity() -> None:
|
||||
"""
|
||||
The fixture's reason for being. A naive clusterer that merges on
|
||||
credential-set Jaccard ≥ 0.5 will fuse the two campaigns (Jaccard
|
||||
= 1.0 on shared wordlists). That fusion drives homogeneity to 0
|
||||
— exactly the failure mode the fixture protects against.
|
||||
|
||||
If this test ever PASSES (i.e. the bad clusterer scores high on
|
||||
this fixture), the fixture has lost its discrimination power and
|
||||
needs to be re-examined.
|
||||
"""
|
||||
spec = load_yaml(FIXTURE_DIR / "shared_wordlist.yaml")
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = credential_jaccard_clusterer(corpus, threshold=0.5)
|
||||
metrics = score(corpus.truth_labels(), pred)
|
||||
# The two campaigns must be merged by this clusterer.
|
||||
assert len(set(pred.values())) == 1, (
|
||||
"credential-Jaccard clusterer should merge both campaigns into one"
|
||||
)
|
||||
# And homogeneity must collapse — that's the signal a fixture-aware
|
||||
# CI gate would use to reject the bad clusterer.
|
||||
assert metrics["homogeneity"] == pytest.approx(0.0)
|
||||
|
||||
|
||||
def test_naive_clusterer_does_not_fool_the_fixture() -> None:
|
||||
"""
|
||||
Belt-and-braces: even though the bad clusterer collapses
|
||||
homogeneity, it might still pass *some* metrics (completeness is
|
||||
actually 1.0 — all members of each true campaign land in the
|
||||
single mega-cluster). The fixture's bound floor on homogeneity
|
||||
(0.90) must reject it.
|
||||
"""
|
||||
spec = load_yaml(FIXTURE_DIR / "shared_wordlist.yaml")
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = credential_jaccard_clusterer(corpus, threshold=0.5)
|
||||
metrics = score(corpus.truth_labels(), pred)
|
||||
bounds = {
|
||||
"adjusted_rand_index": 0.85,
|
||||
"homogeneity": 0.90,
|
||||
"completeness": 0.80,
|
||||
"singleton_recall": 0.95,
|
||||
}
|
||||
breaches = [k for k, floor in bounds.items() if metrics[k] < floor]
|
||||
assert "homogeneity" in breaches, (
|
||||
f"fixture failed to catch the bad clusterer; observed metrics: {metrics}"
|
||||
)
|
||||
348
tests/clustering/test_similarity.py
Normal file
348
tests/clustering/test_similarity.py
Normal file
@@ -0,0 +1,348 @@
|
||||
"""Unit tests for the similarity-graph primitives.
|
||||
|
||||
Each edge function is tested in isolation: agreement → high score,
|
||||
disagreement → zero, missing-data → zero. Combination logic +
|
||||
thresholds live in the connected-components impl and are covered by
|
||||
the fixture suite once those land.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.clustering.impl.similarity import (
|
||||
EDGE_THRESHOLD,
|
||||
Observation,
|
||||
combined_edge_weight,
|
||||
from_synthetic,
|
||||
high_weight_edge,
|
||||
low_weight_edge,
|
||||
medium_weight_edge,
|
||||
very_low_weight_edge,
|
||||
)
|
||||
|
||||
|
||||
def _obs(**kwargs) -> Observation:
|
||||
"""Build an Observation with sensible defaults for tests."""
|
||||
kwargs.setdefault("observation_id", "obs-x")
|
||||
return Observation(**kwargs)
|
||||
|
||||
|
||||
# ─── high_weight_edge ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_high_weight_ja3_match():
|
||||
a = _obs(ja3="ja3-stable")
|
||||
b = _obs(ja3="ja3-stable")
|
||||
assert high_weight_edge(a, b) == 1.0
|
||||
|
||||
|
||||
def test_high_weight_hassh_match():
|
||||
a = _obs(hassh="hassh-stable")
|
||||
b = _obs(hassh="hassh-stable")
|
||||
assert high_weight_edge(a, b) == 1.0
|
||||
|
||||
|
||||
def test_high_weight_payload_hash_overlap():
|
||||
a = _obs(payload_hashes=frozenset({"pl-1", "pl-2"}))
|
||||
b = _obs(payload_hashes=frozenset({"pl-2", "pl-3"}))
|
||||
assert high_weight_edge(a, b) == 1.0
|
||||
|
||||
|
||||
def test_high_weight_c2_overlap():
|
||||
a = _obs(c2_endpoints=frozenset({"c2.example.com"}))
|
||||
b = _obs(c2_endpoints=frozenset({"c2.example.com", "c2-alt.example.com"}))
|
||||
assert high_weight_edge(a, b) == 1.0
|
||||
|
||||
|
||||
def test_high_weight_no_match():
|
||||
a = _obs(ja3="ja3-a", hassh="hassh-a", payload_hashes=frozenset({"x"}))
|
||||
b = _obs(ja3="ja3-b", hassh="hassh-b", payload_hashes=frozenset({"y"}))
|
||||
assert high_weight_edge(a, b) == 0.0
|
||||
|
||||
|
||||
def test_high_weight_both_null_ja3_does_not_match():
|
||||
"""Both-null JA3 must not be treated as 'agreement' — that would
|
||||
fuse every un-fingerprinted noise scanner into one mega-cluster."""
|
||||
a = _obs(ja3=None, hassh=None)
|
||||
b = _obs(ja3=None, hassh=None)
|
||||
assert high_weight_edge(a, b) == 0.0
|
||||
|
||||
|
||||
# ─── fingerprint-disagreement veto on payload / C2 ──────────────────────────
|
||||
|
||||
|
||||
def test_high_weight_veto_on_fingerprint_disagreement_with_shared_c2():
|
||||
"""Fixture 5 protection: two operators with distinct JA3 + HASSH
|
||||
sharing a C2 endpoint must NOT score as identity match."""
|
||||
a = _obs(ja3="ja3-A", hassh="hassh-A",
|
||||
c2_endpoints=frozenset({"c2.shared.example"}))
|
||||
b = _obs(ja3="ja3-B", hassh="hassh-B",
|
||||
c2_endpoints=frozenset({"c2.shared.example"}))
|
||||
assert high_weight_edge(a, b) == 0.0
|
||||
|
||||
|
||||
def test_high_weight_veto_on_fingerprint_disagreement_with_shared_payload():
|
||||
"""Same shape, payload signal — also vetoed."""
|
||||
a = _obs(ja3="ja3-A", hassh="hassh-A",
|
||||
payload_hashes=frozenset({"stage1"}))
|
||||
b = _obs(ja3="ja3-B", hassh="hassh-B",
|
||||
payload_hashes=frozenset({"stage1"}))
|
||||
assert high_weight_edge(a, b) == 0.0
|
||||
|
||||
|
||||
def test_high_weight_no_veto_when_fingerprints_unknown():
|
||||
"""Two un-fingerprinted observations sharing C2 still cluster —
|
||||
we don't veto without evidence of disagreement."""
|
||||
a = _obs(c2_endpoints=frozenset({"c2.shared.example"}))
|
||||
b = _obs(c2_endpoints=frozenset({"c2.shared.example"}))
|
||||
assert high_weight_edge(a, b) == 1.0
|
||||
|
||||
|
||||
def test_high_weight_no_veto_when_one_side_unknown():
|
||||
"""One observation without fingerprints + one with — no
|
||||
disagreement evidence, so shared C2 still clusters."""
|
||||
a = _obs(ja3="ja3-A", hassh="hassh-A",
|
||||
c2_endpoints=frozenset({"c2.shared.example"}))
|
||||
b = _obs(c2_endpoints=frozenset({"c2.shared.example"}))
|
||||
assert high_weight_edge(a, b) == 1.0
|
||||
|
||||
|
||||
def test_high_weight_partial_fingerprint_agreement_no_veto():
|
||||
"""JA3 agrees, HASSH disagrees → some agreement → no veto. The
|
||||
veto only triggers on FULL disagreement."""
|
||||
a = _obs(ja3="ja3-shared", hassh="hassh-A",
|
||||
c2_endpoints=frozenset({"c2.shared.example"}))
|
||||
b = _obs(ja3="ja3-shared", hassh="hassh-B",
|
||||
c2_endpoints=frozenset({"c2.shared.example"}))
|
||||
# JA3 agreement returns 1.0 immediately; veto never reached.
|
||||
assert high_weight_edge(a, b) == 1.0
|
||||
|
||||
|
||||
def test_high_weight_partial_disagreement_one_slot_only_vetoes():
|
||||
"""One slot comparable + disagrees, other slot uncomparable
|
||||
(one side null) → veto triggers (only available evidence is
|
||||
disagreement)."""
|
||||
a = _obs(ja3="ja3-A", hassh=None,
|
||||
c2_endpoints=frozenset({"c2.shared.example"}))
|
||||
b = _obs(ja3="ja3-B", hassh=None,
|
||||
c2_endpoints=frozenset({"c2.shared.example"}))
|
||||
assert high_weight_edge(a, b) == 0.0
|
||||
|
||||
|
||||
def test_high_weight_empty_sets_no_match():
|
||||
a = _obs(payload_hashes=frozenset(), c2_endpoints=frozenset())
|
||||
b = _obs(payload_hashes=frozenset(), c2_endpoints=frozenset())
|
||||
assert high_weight_edge(a, b) == 0.0
|
||||
|
||||
|
||||
# ─── medium_weight_edge ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_medium_weight_jaccard_full_match_in_one_phase():
|
||||
a = _obs(commands_by_phase={"discovery": ("ls", "id", "uname -a")})
|
||||
b = _obs(commands_by_phase={"discovery": ("ls", "id", "uname -a")})
|
||||
assert medium_weight_edge(a, b) == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_medium_weight_jaccard_partial_match():
|
||||
a = _obs(commands_by_phase={"discovery": ("ls", "id", "uname -a", "whoami")})
|
||||
b = _obs(commands_by_phase={"discovery": ("ls", "id")})
|
||||
# |A∩B|=2, |A∪B|=4 → 0.5
|
||||
assert medium_weight_edge(a, b) == pytest.approx(0.5)
|
||||
|
||||
|
||||
def test_medium_weight_picks_max_across_phases():
|
||||
a = _obs(commands_by_phase={
|
||||
"discovery": ("ls",),
|
||||
"exploitation": ("./payload", "chmod +x payload"),
|
||||
})
|
||||
b = _obs(commands_by_phase={
|
||||
"discovery": ("ps",), # 0.0
|
||||
"exploitation": ("./payload", "chmod +x payload"), # 1.0
|
||||
})
|
||||
assert medium_weight_edge(a, b) == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_medium_weight_no_shared_phase_returns_zero():
|
||||
a = _obs(commands_by_phase={"discovery": ("ls",)})
|
||||
b = _obs(commands_by_phase={"exploitation": ("./payload",)})
|
||||
assert medium_weight_edge(a, b) == 0.0
|
||||
|
||||
|
||||
def test_medium_weight_disjoint_commands_in_shared_phase():
|
||||
a = _obs(commands_by_phase={"discovery": ("ls",)})
|
||||
b = _obs(commands_by_phase={"discovery": ("ps",)})
|
||||
# |A∩B|=0, |A∪B|=2
|
||||
assert medium_weight_edge(a, b) == 0.0
|
||||
|
||||
|
||||
def test_medium_weight_empty_corpora_returns_zero():
|
||||
a = _obs()
|
||||
b = _obs()
|
||||
assert medium_weight_edge(a, b) == 0.0
|
||||
|
||||
|
||||
# ─── low_weight_edge ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_low_weight_credential_jaccard_match():
|
||||
a = _obs(credentials=frozenset({("root", "toor"), ("admin", "admin")}))
|
||||
b = _obs(credentials=frozenset({("root", "toor"), ("admin", "admin")}))
|
||||
assert low_weight_edge(a, b) == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_low_weight_credential_partial_overlap():
|
||||
a = _obs(credentials=frozenset({("root", "toor"), ("admin", "admin")}))
|
||||
b = _obs(credentials=frozenset({("root", "toor"), ("user", "user")}))
|
||||
assert low_weight_edge(a, b) == pytest.approx(1 / 3)
|
||||
|
||||
|
||||
def test_low_weight_no_credentials_returns_zero():
|
||||
a = _obs()
|
||||
b = _obs(credentials=frozenset({("root", "toor")}))
|
||||
assert low_weight_edge(a, b) == 0.0
|
||||
|
||||
|
||||
# ─── very_low_weight_edge ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_very_low_weight_asn_match():
|
||||
a = _obs(asn=64500)
|
||||
b = _obs(asn=64500)
|
||||
assert very_low_weight_edge(a, b) == 1.0
|
||||
|
||||
|
||||
def test_very_low_weight_asn_mismatch():
|
||||
a = _obs(asn=64500)
|
||||
b = _obs(asn=64501)
|
||||
assert very_low_weight_edge(a, b) == 0.0
|
||||
|
||||
|
||||
def test_very_low_weight_asn_null_returns_zero():
|
||||
a = _obs(asn=None)
|
||||
b = _obs(asn=64500)
|
||||
assert very_low_weight_edge(a, b) == 0.0
|
||||
|
||||
|
||||
# ─── time-agnostic invariant ───────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_observations_carry_no_timestamps():
|
||||
"""Compile-time guarantee: Observation has no time fields, so no
|
||||
edge function can accidentally start using them. Fixture 7 forbids
|
||||
recency-decay clustering."""
|
||||
field_names = set(Observation.__dataclass_fields__.keys())
|
||||
forbidden = {"first_seen", "last_seen", "started_at", "session_midpoint", "timestamp"}
|
||||
assert field_names.isdisjoint(forbidden), (
|
||||
f"Observation grew time fields: {field_names & forbidden}. "
|
||||
"Fixture 7 (slow_burn) forbids recency-aware clustering."
|
||||
)
|
||||
|
||||
|
||||
# ─── from_synthetic adapter ────────────────────────────────────────────────
|
||||
|
||||
|
||||
# ─── combined_edge_weight tier discipline ─────────────────────────────────
|
||||
|
||||
|
||||
def test_combined_high_alone_crosses_threshold():
|
||||
a = _obs(ja3="ja3-shared")
|
||||
b = _obs(ja3="ja3-shared")
|
||||
assert combined_edge_weight(a, b) >= EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_medium_alone_below_threshold():
|
||||
"""Single medium-tier match must NOT cluster — medium is a
|
||||
supporting signal, never a clustering driver on its own."""
|
||||
a = _obs(commands_by_phase={"discovery": ("ls", "id", "uname")})
|
||||
b = _obs(commands_by_phase={"discovery": ("ls", "id", "uname")})
|
||||
weight = combined_edge_weight(a, b)
|
||||
assert 0 < weight < EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_low_alone_below_threshold():
|
||||
"""Credential-only overlap must NOT cluster — fixture 1's failure mode."""
|
||||
a = _obs(credentials=frozenset({("root", "toor"), ("admin", "admin")}))
|
||||
b = _obs(credentials=frozenset({("root", "toor"), ("admin", "admin")}))
|
||||
weight = combined_edge_weight(a, b)
|
||||
assert 0 < weight < EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_very_low_alone_below_threshold():
|
||||
"""ASN-only overlap must NOT cluster — fixture 2's failure mode."""
|
||||
a = _obs(asn=64500)
|
||||
b = _obs(asn=64500)
|
||||
weight = combined_edge_weight(a, b)
|
||||
assert 0 < weight < EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_all_weak_tiers_still_below_threshold():
|
||||
"""Even all three weaker tiers stacked don't reach threshold —
|
||||
only a high-tier signal does."""
|
||||
a = _obs(
|
||||
asn=64500,
|
||||
credentials=frozenset({("root", "toor")}),
|
||||
commands_by_phase={"discovery": ("ls",)},
|
||||
)
|
||||
b = _obs(
|
||||
asn=64500,
|
||||
credentials=frozenset({("root", "toor")}),
|
||||
commands_by_phase={"discovery": ("ls",)},
|
||||
)
|
||||
# 0.6*1.0 (medium) + 0.2*1.0 (low) + 0.05*1.0 (very_low) = 0.85
|
||||
weight = combined_edge_weight(a, b)
|
||||
assert weight < EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_high_plus_medium_clusters():
|
||||
a = _obs(ja3="ja3-x", commands_by_phase={"discovery": ("ls",)})
|
||||
b = _obs(ja3="ja3-x", commands_by_phase={"discovery": ("ls",)})
|
||||
assert combined_edge_weight(a, b) >= EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_no_signal_returns_zero():
|
||||
a = _obs()
|
||||
b = _obs()
|
||||
assert combined_edge_weight(a, b) == 0.0
|
||||
|
||||
|
||||
def test_from_synthetic_round_trip():
|
||||
"""The adapter projects a SyntheticAttacker into an Observation
|
||||
that the edge functions can score over."""
|
||||
from datetime import datetime, timezone
|
||||
from tests.factories.campaign_factory import (
|
||||
SyntheticAttacker, SyntheticSession,
|
||||
)
|
||||
from decnet.clustering.ukc import UKCPhase
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
sess = SyntheticSession(
|
||||
session_id="s1",
|
||||
attacker_id="a1",
|
||||
decky_id="d1",
|
||||
started_at=now,
|
||||
duration_s=10.0,
|
||||
phase=UKCPhase.DISCOVERY,
|
||||
commands=["ls", "id"],
|
||||
credentials_tried=[("root", "toor")],
|
||||
payload_hash="pl-1",
|
||||
c2_callback="c2.example.com",
|
||||
truth_campaign_id="c1",
|
||||
truth_actor_id="actor-1",
|
||||
)
|
||||
att = SyntheticAttacker(
|
||||
attacker_id="a1", ip="1.1.1.1", asn=64500,
|
||||
ja3="ja3-x", hassh="hassh-y",
|
||||
first_seen=now, last_seen=now,
|
||||
truth_campaign_id="c1", truth_actor_id="actor-1",
|
||||
sessions=[sess],
|
||||
)
|
||||
obs = from_synthetic(att)
|
||||
assert obs.observation_id == "a1"
|
||||
assert obs.ja3 == "ja3-x"
|
||||
assert obs.hassh == "hassh-y"
|
||||
assert obs.asn == 64500
|
||||
assert obs.payload_hashes == frozenset({"pl-1"})
|
||||
assert obs.c2_endpoints == frozenset({"c2.example.com"})
|
||||
assert obs.credentials == frozenset({("root", "toor")})
|
||||
assert obs.commands_by_phase == {"discovery": ("ls", "id")}
|
||||
128
tests/clustering/test_slow_burn_fixture.py
Normal file
128
tests/clustering/test_slow_burn_fixture.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""
|
||||
End-to-end pipeline test for fixture 7 (slow_burn).
|
||||
|
||||
90-day APT campaign with three operational windows separated by
|
||||
multi-week silences. Models the real operational tempo of an APT
|
||||
working a deep nested topology (MazeNET-style): recon over weeks,
|
||||
exploitation later, action-on-objectives later still. The unique
|
||||
signal this fixture stresses is TIME-AGNOSTIC IDENTITY — a
|
||||
clusterer that silently expires old edges fragments any campaign
|
||||
that operates over months.
|
||||
|
||||
Three tests cover this:
|
||||
|
||||
1. `test_slow_burn_corpus_shape` — sanity: 3 attackers, all share
|
||||
campaign id and operator fingerprint, sessions land in their
|
||||
respective operational windows.
|
||||
|
||||
2. `test_slow_burn_pipeline_passes_bounds` —
|
||||
`composite_signals_clusterer` (fingerprint OR C2 — time-agnostic)
|
||||
folds all three windows into one cluster.
|
||||
|
||||
3. `test_recency_decay_clusterer_fragments_campaign` — runs the
|
||||
deliberately-bad `recency_decay_clusterer` with a 14-day half-
|
||||
life and a 0.5 weight threshold. Edges between adjacent
|
||||
operational windows (24+ days apart) decay below threshold and
|
||||
drop. The campaign splits into three clusters; completeness
|
||||
collapses; the bound floor rejects the bad clusterer.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.clustering.fixture_harness import (
|
||||
assert_fixture_bounds,
|
||||
composite_signals_clusterer,
|
||||
recency_decay_clusterer,
|
||||
)
|
||||
from tests.clustering.metrics import score
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
|
||||
FIXTURE_YAML = FIXTURE_DIR / "slow_burn.yaml"
|
||||
EXPECTED_YAML = FIXTURE_DIR / "slow_burn.expected.yaml"
|
||||
|
||||
|
||||
def test_slow_burn_corpus_shape() -> None:
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
assert len(corpus.attackers) == 3
|
||||
truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
|
||||
assert truth_campaigns == {"slow-burn-001"}
|
||||
# Operator fingerprint stays stable across all three windows.
|
||||
ja3s = {a.ja3 for a in corpus.attackers}
|
||||
hasshs = {a.hassh for a in corpus.attackers}
|
||||
assert len(ja3s) == 1
|
||||
assert len(hasshs) == 1
|
||||
# Each row's sessions land in its operational window.
|
||||
by_actor = {a.truth_actor_id: a for a in corpus.attackers}
|
||||
recon_days = {s.started_at.timetuple().tm_yday for s in by_actor["ops-recon"].sessions}
|
||||
exploit_days = {s.started_at.timetuple().tm_yday for s in by_actor["ops-exploit"].sessions}
|
||||
action_days = {s.started_at.timetuple().tm_yday for s in by_actor["ops-action"].sessions}
|
||||
# Epoch is 2026-01-01 (day-of-year 1). active_days [7-11] →
|
||||
# day-of-year [8-12]; [35-39] → [36-40]; [75-79] → [76-80].
|
||||
assert recon_days <= {8, 9, 10, 11, 12}, recon_days
|
||||
assert exploit_days <= {36, 37, 38, 39, 40}, exploit_days
|
||||
assert action_days <= {76, 77, 78, 79, 80}, action_days
|
||||
|
||||
|
||||
def test_slow_burn_pipeline_passes_bounds() -> None:
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
metrics = assert_fixture_bounds(corpus, composite_signals_clusterer, EXPECTED_YAML)
|
||||
pred = composite_signals_clusterer(corpus)
|
||||
assert len(set(pred.values())) == 1, (
|
||||
"composite_signals_clusterer should fold all three windows into one cluster"
|
||||
)
|
||||
assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_recency_decay_clusterer_fragments_campaign() -> None:
|
||||
"""
|
||||
The fixture's reason for being. Recency decay with a 14-day
|
||||
half-life expires edges between operational windows that are
|
||||
24+ days apart, dropping their weight below the 0.5 threshold.
|
||||
The campaign fragments into three clusters; completeness
|
||||
collapses.
|
||||
|
||||
If this test ever passes (the bad clusterer satisfies the
|
||||
bounds), the fixture has lost its discrimination power.
|
||||
"""
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = recency_decay_clusterer(corpus, half_life_days=14.0, threshold=0.5)
|
||||
assert len(set(pred.values())) == 3, (
|
||||
f"recency-decay clusterer should split into 3 clusters, "
|
||||
f"got {len(set(pred.values()))}"
|
||||
)
|
||||
|
||||
metrics = score(corpus.truth_labels(level="campaign"), pred)
|
||||
assert metrics["completeness"] == pytest.approx(0.0)
|
||||
|
||||
bounds = {
|
||||
"adjusted_rand_index": 0.85,
|
||||
"homogeneity": 0.90,
|
||||
"completeness": 0.80,
|
||||
"singleton_recall": 0.95,
|
||||
}
|
||||
breaches = [k for k, floor in bounds.items() if metrics[k] < floor]
|
||||
assert "completeness" in breaches, (
|
||||
f"fixture failed to catch the bad clusterer; observed metrics: {metrics}"
|
||||
)
|
||||
|
||||
|
||||
def test_recency_decay_clusterer_with_long_halflife_does_not_fragment() -> None:
|
||||
"""
|
||||
Sanity for the recency-decay reference: with a half-life longer
|
||||
than the campaign duration, every edge survives the decay. The
|
||||
three windows union into one. Confirms the clusterer's
|
||||
behavior depends on the half-life parameter, not on something
|
||||
unrelated. (Half-life 365 → edges across 40 days decay to
|
||||
~0.93, well above the 0.5 threshold.)
|
||||
"""
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = recency_decay_clusterer(corpus, half_life_days=365.0, threshold=0.5)
|
||||
assert len(set(pred.values())) == 1
|
||||
126
tests/clustering/test_vpn_hopping_fixture.py
Normal file
126
tests/clustering/test_vpn_hopping_fixture.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""
|
||||
End-to-end pipeline test for fixture 2 (vpn_hopping).
|
||||
|
||||
One campaign, one actor, ip_pool: rotating across 5 distinct ASNs.
|
||||
JA3, HASSH, and payload_hash stable across every rotation. The
|
||||
fixture is the canonical "same hands, different IP/ASN" scenario
|
||||
that motivates Identity Resolution (see development/
|
||||
IDENTITY_RESOLUTION.md — these are the signals "the attacker can't
|
||||
cheaply rotate"). It also stresses the clusterer's weighting of
|
||||
ASN: the real similarity graph weights ASN match "very low" because
|
||||
VPN/proxy hopping shatters ASN within a single identity.
|
||||
|
||||
Three tests cover this:
|
||||
|
||||
1. `test_vpn_hopping_pipeline_passes_bounds_at_campaign_level` —
|
||||
`fingerprint_clusterer` reference folds all 5 rotated rows into
|
||||
one cluster (shared JA3 + HASSH). Trivially green at campaign-
|
||||
level scoring; the test is a ratchet point for the real algorithm
|
||||
to keep passing once it lands.
|
||||
|
||||
2. `test_vpn_hopping_pipeline_passes_bounds_at_identity_level` —
|
||||
same clusterer, scored against the identity-level oracle. Verifies
|
||||
the factory's `truth_identity_id` plumbing across rotated rows
|
||||
(commit f6b8375) actually expresses the right ground truth: 5
|
||||
observations → 1 identity.
|
||||
|
||||
3. `test_asn_clusterer_fragments_campaign` — runs the deliberately-
|
||||
bad `asn_clusterer` reference. The 5 rotation_asns become 5
|
||||
singleton clusters → completeness collapses to ~0, ARI collapses,
|
||||
and the fixture's bound floor on completeness (0.80) rejects the
|
||||
bad clusterer. If this test ever passes, the fixture has lost its
|
||||
discrimination power.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.clustering.fixture_harness import (
|
||||
asn_clusterer,
|
||||
assert_fixture_bounds,
|
||||
fingerprint_clusterer,
|
||||
)
|
||||
from tests.clustering.metrics import score
|
||||
from tests.factories.campaign_factory import generate, load_yaml
|
||||
|
||||
FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "campaigns"
|
||||
FIXTURE_YAML = FIXTURE_DIR / "vpn_hopping.yaml"
|
||||
EXPECTED_YAML = FIXTURE_DIR / "vpn_hopping.expected.yaml"
|
||||
|
||||
|
||||
def test_vpn_hopping_corpus_shape() -> None:
|
||||
"""One actor, rotation_count=5 → 5 observation rows, 1 identity, 1 campaign."""
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
assert len(corpus.attackers) == 5
|
||||
truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
|
||||
truth_identities = {a.truth_identity_id for a in corpus.attackers}
|
||||
truth_actors = {a.truth_actor_id for a in corpus.attackers}
|
||||
assert truth_campaigns == {"vpn-hopping-001"}
|
||||
assert len(truth_identities) == 1, "all 5 rotations must share one truth_identity_id"
|
||||
assert truth_actors == {"hopper-a"}
|
||||
asns = {a.asn for a in corpus.attackers}
|
||||
assert asns == {64512, 64513, 64514, 64515, 64516}
|
||||
ips = {a.ip for a in corpus.attackers}
|
||||
assert len(ips) == 5, "rotation must produce 5 distinct IPs"
|
||||
# Stable fingerprints across every row — the load-bearing signal.
|
||||
ja3s = {a.ja3 for a in corpus.attackers}
|
||||
hasshs = {a.hassh for a in corpus.attackers}
|
||||
assert len(ja3s) == 1
|
||||
assert len(hasshs) == 1
|
||||
|
||||
|
||||
def test_vpn_hopping_pipeline_passes_bounds_at_campaign_level() -> None:
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
assert_fixture_bounds(corpus, fingerprint_clusterer, EXPECTED_YAML)
|
||||
|
||||
|
||||
def test_vpn_hopping_pipeline_passes_bounds_at_identity_level() -> None:
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
metrics = assert_fixture_bounds(
|
||||
corpus, fingerprint_clusterer, EXPECTED_YAML, truth_level="identity"
|
||||
)
|
||||
# All 5 observations should land in the same predicted cluster
|
||||
# AND share one truth identity → ARI is exactly 1.0.
|
||||
assert metrics["adjusted_rand_index"] == pytest.approx(1.0)
|
||||
assert metrics["completeness"] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_asn_clusterer_fragments_campaign() -> None:
|
||||
"""
|
||||
The fixture's reason for being. Group by ASN and the campaign
|
||||
shatters into 5 singletons — completeness goes to 0 because the
|
||||
one true class is split across 5 predicted clusters. The bound
|
||||
floor on completeness (0.80) must reject this.
|
||||
|
||||
If this test ever passes (asn_clusterer satisfies the bounds),
|
||||
the fixture has lost its discrimination power.
|
||||
"""
|
||||
spec = load_yaml(FIXTURE_YAML)
|
||||
corpus = generate(spec, seed=0)
|
||||
pred = asn_clusterer(corpus)
|
||||
# 5 distinct ASNs in the rotation → 5 distinct predicted clusters.
|
||||
assert len(set(pred.values())) == 5
|
||||
|
||||
metrics = score(corpus.truth_labels(level="campaign"), pred)
|
||||
# Completeness collapses — that's the failure mode the fixture
|
||||
# protects against.
|
||||
assert metrics["completeness"] == pytest.approx(0.0)
|
||||
# ARI collapses too (very different partitions).
|
||||
assert metrics["adjusted_rand_index"] < 0.1
|
||||
|
||||
# The bound floor would reject this clusterer.
|
||||
bounds = {
|
||||
"adjusted_rand_index": 0.85,
|
||||
"homogeneity": 0.90,
|
||||
"completeness": 0.80,
|
||||
"singleton_recall": 0.95,
|
||||
}
|
||||
breaches = [k for k, floor in bounds.items() if metrics[k] < floor]
|
||||
assert "completeness" in breaches, (
|
||||
f"fixture failed to catch the bad clusterer; observed metrics: {metrics}"
|
||||
)
|
||||
Reference in New Issue
Block a user