Adds the actor.active_days primitive to the campaign factory so a DSL actor can be bound to specific day indexes. Falls back to the non-paused day pool when absent (existing fixtures unchanged). Intersects with pause_windows so the campaign-wide silence still wins if both are set. Adds time_window_clusterer reference to fixture_harness — union-find over attackers, edge if their session time-ranges are within gap_days of each other. Deliberately-bad reference for fixture 4: multi-day silent stretches fragment a single campaign because the clusterer has no signal that bridges the gap. Fixture 4 (paused_campaign): one campaign modeled as two DSL actors representing the operator's two operational windows (active days 1-2 and 6-7), separated by a silent stretch (days 3-5). Both share JA3 + HASSH + payload + C2 callback; only their active_days differ. Five tests: corpus shape (rows in their windows, shared signals), pipeline pass via fingerprint_clusterer at level=campaign, adversarial fragmentation via time_window_clusterer (1-day union threshold cannot bridge the 4-day silence → completeness collapses), huge-gap sanity (gap_days=10 unions both halves), silent-stretch invariant (no session leaks into the configured pause window). Identity-level scoring is fixture 2's job; this fixture is campaign-level only — modeling caveat documented in the YAML.
236 lines
8.7 KiB
Python
236 lines
8.7 KiB
Python
"""
|
|
Shared helpers for fixture-driven clustering tests.
|
|
|
|
Each fixture lives at `tests/fixtures/campaigns/<name>.yaml` with paired
|
|
`<name>.expected.yaml` bound file. The harness here keeps every per-
|
|
fixture test file down to "load corpus → predict → assert bounds" without
|
|
copy-pasting the bound-walk loop or reference clusterers across files.
|
|
|
|
Reference clusterers are provided as the algorithm under test in each
|
|
fixture's bound assertions; their names describe the *signal* they
|
|
cluster on, not the quality of the result.
|
|
|
|
* `identity_clusterer` — every attacker is its own cluster. Trivially
|
|
passes any fixture whose ground truth is all singletons (lone_wolf,
|
|
shared_wordlist before merge, etc). Useful as a green baseline while
|
|
the real connected-components algorithm is under construction.
|
|
|
|
* `fingerprint_clusterer` — groups attackers by ``(ja3, hassh)``.
|
|
Approximates the "stable signals an attacker can't cheaply rotate"
|
|
arm of the planned similarity graph (see IDENTITY_RESOLUTION.md
|
|
Premise). Folds rotated-IP observations of one actor into one
|
|
cluster when the actor's JA3 + HASSH stay stable. Attackers whose
|
|
fingerprints are both NULL (typical of un-fingerprinted noise
|
|
scanners) are treated as un-mergeable — each becomes its own
|
|
singleton — so this clusterer doesn't trivially fuse all noise
|
|
into one mega-cluster.
|
|
|
|
* `credential_jaccard_clusterer` — deliberately-bad reference that
|
|
merges any two attackers whose credential-attempt sets overlap above
|
|
a threshold. Exists so fixtures like `shared_wordlist` can prove
|
|
they fail a clusterer that relies on credential overlap alone — the
|
|
whole point of fixture #1.
|
|
|
|
* `asn_clusterer` — deliberately-bad reference that groups attackers
|
|
by source ASN. Exists so fixtures like `vpn_hopping` (fixture #2)
|
|
can prove they fail a clusterer that treats ASN match as a
|
|
high-weight signal — VPN/proxy hopping shatters ASN within a single
|
|
identity and a clusterer that leans on it tanks completeness.
|
|
|
|
* `time_window_clusterer` — deliberately-bad reference that unions
|
|
attackers whose session time-ranges are within ``gap_days`` of each
|
|
other. Exists so fixtures like `paused_campaign` (fixture #4) can
|
|
prove they fail a clusterer that treats short-window time proximity
|
|
as a primary signal — operators pause, sleep, take weekends.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from collections.abc import Callable
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
from tests.clustering.metrics import score
|
|
from tests.factories.campaign_factory import GeneratedCorpus
|
|
|
|
PredictFn = Callable[[GeneratedCorpus], dict[str, str]]
|
|
|
|
|
|
def assert_fixture_bounds(
|
|
corpus: GeneratedCorpus,
|
|
predict: PredictFn,
|
|
expected_path: str | Path,
|
|
*,
|
|
truth_level: str = "campaign",
|
|
) -> dict[str, float]:
|
|
"""
|
|
Run `predict` against the corpus, score against ground truth, and
|
|
assert every metric meets the floor declared in `expected_path`.
|
|
|
|
``truth_level`` selects the oracle: ``"campaign"`` (default) for
|
|
campaign-clustering fixtures, ``"identity"`` for identity-resolution
|
|
fixtures (where the clusterer's job is to fold N rotated-IP
|
|
observations into one identity), or ``"actor"`` for completeness.
|
|
|
|
Returns the observed metrics dict so callers can do additional
|
|
assertions (e.g. "homogeneity is *exactly* 1.0 for this fixture").
|
|
"""
|
|
bounds = yaml.safe_load(Path(expected_path).read_text(encoding="utf-8"))
|
|
truth = corpus.truth_labels(level=truth_level)
|
|
pred = predict(corpus)
|
|
metrics = score(truth, pred)
|
|
|
|
failures = []
|
|
for name, bound in bounds.items():
|
|
observed = metrics[name]
|
|
floor = bound["min"]
|
|
if observed < floor:
|
|
failures.append(f"{name}={observed:.3f} < min {floor:.3f}")
|
|
assert not failures, (
|
|
"fixture bounds violated: " + "; ".join(failures)
|
|
+ f" (full metrics: {metrics})"
|
|
)
|
|
return metrics
|
|
|
|
|
|
# ─── Reference clusterers ───────────────────────────────────────────────────
|
|
|
|
|
|
def identity_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
|
|
"""Every attacker → its own cluster. Placeholder until §4 algorithm lands."""
|
|
return {a.attacker_id: f"cluster-{a.attacker_id}" for a in corpus.attackers}
|
|
|
|
|
|
def fingerprint_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
|
|
"""Group by ``(ja3, hassh)``. Un-fingerprinted rows stay singleton.
|
|
|
|
Approximates the stable-signal arm of the planned similarity graph;
|
|
the real algorithm in `decnet/clustering/` will extend this with
|
|
payload simhashes, C2 callback overlap, and phase-handoff edges.
|
|
"""
|
|
pred: dict[str, str] = {}
|
|
for att in corpus.attackers:
|
|
if att.ja3 is None and att.hassh is None:
|
|
# No fingerprint to share — un-mergeable, own cluster.
|
|
pred[att.attacker_id] = f"fp-singleton-{att.attacker_id}"
|
|
else:
|
|
pred[att.attacker_id] = f"fp::{att.ja3}::{att.hassh}"
|
|
return pred
|
|
|
|
|
|
def asn_clusterer(corpus: GeneratedCorpus) -> dict[str, str]:
|
|
"""Group by source ASN. Deliberately-bad — see fixture 2."""
|
|
return {a.attacker_id: f"asn-{a.asn}" for a in corpus.attackers}
|
|
|
|
|
|
def time_window_clusterer(
|
|
corpus: GeneratedCorpus, *, gap_days: float = 1.0
|
|
) -> dict[str, str]:
|
|
"""Union-find over attackers, edge if their session time-ranges
|
|
overlap or are within ``gap_days`` of each other.
|
|
|
|
Deliberately-bad reference for fixture 4 (paused_campaign): a
|
|
campaign that goes silent for several days will be split into
|
|
"before pause" and "after pause" clusters by this clusterer,
|
|
breaching completeness. The real algorithm must not lean on
|
|
short-window time proximity as a primary signal — operators
|
|
pause, sleep, switch shifts, take weekends. Time bursts are a
|
|
weak hint, not a hard partition.
|
|
|
|
Attackers with no sessions become their own singleton cluster.
|
|
"""
|
|
from datetime import timedelta
|
|
|
|
gap = timedelta(days=gap_days)
|
|
ids = [a.attacker_id for a in corpus.attackers]
|
|
ranges: dict[str, tuple] = {}
|
|
for att in corpus.attackers:
|
|
if not att.sessions:
|
|
continue
|
|
starts = [s.started_at for s in att.sessions]
|
|
ends = [s.started_at + timedelta(seconds=s.duration_s) for s in att.sessions]
|
|
ranges[att.attacker_id] = (min(starts), max(ends))
|
|
|
|
parent: dict[str, str] = {aid: aid for aid in ids}
|
|
|
|
def find(x: str) -> str:
|
|
while parent[x] != x:
|
|
parent[x] = parent[parent[x]]
|
|
x = parent[x]
|
|
return x
|
|
|
|
def union(x: str, y: str) -> None:
|
|
rx, ry = find(x), find(y)
|
|
if rx != ry:
|
|
parent[rx] = ry
|
|
|
|
keys = list(ranges.keys())
|
|
for i, a in enumerate(keys):
|
|
a_start, a_end = ranges[a]
|
|
for b in keys[i + 1 :]:
|
|
b_start, b_end = ranges[b]
|
|
# Time-distance between the two ranges (0 if they overlap).
|
|
if a_end < b_start:
|
|
separation = b_start - a_end
|
|
elif b_end < a_start:
|
|
separation = a_start - b_end
|
|
else:
|
|
separation = timedelta(0)
|
|
if separation <= gap:
|
|
union(a, b)
|
|
|
|
return {aid: find(aid) for aid in ids}
|
|
|
|
|
|
def credential_jaccard_clusterer(
|
|
corpus: GeneratedCorpus, *, threshold: float = 0.5
|
|
) -> dict[str, str]:
|
|
"""
|
|
Deliberately-bad reference: union-find over attackers, edge whenever
|
|
two attackers' credential-attempt sets have Jaccard ≥ threshold.
|
|
|
|
Used to demonstrate that fixtures targeting credential-overlap
|
|
failure modes (fixture 1: shared_wordlist) actually catch a clusterer
|
|
that leans on credential signals alone. NOT the real algorithm.
|
|
"""
|
|
# Build per-attacker credential sets.
|
|
creds: dict[str, set[tuple[str, str]]] = {}
|
|
for att in corpus.attackers:
|
|
s: set[tuple[str, str]] = set()
|
|
for sess in att.sessions:
|
|
s.update(sess.credentials_tried)
|
|
creds[att.attacker_id] = s
|
|
|
|
# Union-find.
|
|
parent: dict[str, str] = {aid: aid for aid in creds}
|
|
|
|
def find(x: str) -> str:
|
|
while parent[x] != x:
|
|
parent[x] = parent[parent[x]]
|
|
x = parent[x]
|
|
return x
|
|
|
|
def union(x: str, y: str) -> None:
|
|
rx, ry = find(x), find(y)
|
|
if rx != ry:
|
|
parent[rx] = ry
|
|
|
|
ids = list(creds.keys())
|
|
for i, a in enumerate(ids):
|
|
sa = creds[a]
|
|
if not sa:
|
|
continue
|
|
for b in ids[i + 1 :]:
|
|
sb = creds[b]
|
|
if not sb:
|
|
continue
|
|
inter = len(sa & sb)
|
|
union_size = len(sa | sb)
|
|
if union_size == 0:
|
|
continue
|
|
jaccard = inter / union_size
|
|
if jaccard >= threshold:
|
|
union(a, b)
|
|
|
|
return {aid: find(aid) for aid in ids}
|