merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
17
tests/fixtures/campaigns/lone_wolf.expected.yaml
vendored
Normal file
17
tests/fixtures/campaigns/lone_wolf.expected.yaml
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
# Bounds for fixture 3 (lone_wolf).
|
||||
#
|
||||
# Every actor in this fixture is a singleton (the wolf itself, plus
|
||||
# every background-noise scanner). A correct clusterer puts each in
|
||||
# its own cluster; that's a perfect score.
|
||||
#
|
||||
# Bounds are deliberately loose at first — we ratchet them up as the
|
||||
# algorithm matures. Loosening any bound to make CI pass requires
|
||||
# justification in the PR description (per CAMPAIGN_CLUSTERING.md §2).
|
||||
adjusted_rand_index:
|
||||
min: 1.0
|
||||
homogeneity:
|
||||
min: 1.0
|
||||
completeness:
|
||||
min: 1.0
|
||||
singleton_recall:
|
||||
min: 1.0
|
||||
32
tests/fixtures/campaigns/lone_wolf.yaml
vendored
Normal file
32
tests/fixtures/campaigns/lone_wolf.yaml
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
# Fixture 3 (lone_wolf) — see development/CAMPAIGN_CLUSTERING.md §2.
|
||||
#
|
||||
# One opportunistic scanner, Delivery phase only, no follow-up, no shared
|
||||
# signals with anyone else. Surrounded by background noise. The clusterer
|
||||
# must keep the wolf and every noise scanner as their own singleton —
|
||||
# none should be absorbed into anyone else.
|
||||
#
|
||||
# This is the simplest of the six fixtures and exists primarily to prove
|
||||
# the end-to-end pipeline (DSL → factory → clusterer → metrics) before
|
||||
# we invest in the harder scenarios.
|
||||
corpus:
|
||||
campaigns:
|
||||
- campaign:
|
||||
id: lone-wolf-001
|
||||
actors:
|
||||
- id: wolf-a
|
||||
asn: 14061
|
||||
ip_pool: sticky
|
||||
ja3: null
|
||||
hassh: null
|
||||
hours_active_utc: [3, 4, 5]
|
||||
jitter_seconds: 30
|
||||
phases:
|
||||
- name: delivery
|
||||
actor: wolf-a
|
||||
target_selector:
|
||||
service: any
|
||||
count: 1
|
||||
dwell_seconds: 1
|
||||
duration_days: 1
|
||||
noise:
|
||||
scanner_count: 8
|
||||
25
tests/fixtures/campaigns/multi_operator.expected.yaml
vendored
Normal file
25
tests/fixtures/campaigns/multi_operator.expected.yaml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
# Bounds for fixture 5 (multi_operator).
|
||||
#
|
||||
# Ground truth at campaign-level: 1 campaign of 2 observation rows
|
||||
# (one per DSL actor). A correct algorithm scores 1.0 across every
|
||||
# metric on this fixture.
|
||||
#
|
||||
# Completeness is the load-bearing metric: a clusterer that splits
|
||||
# the two operators by shift / by tooling / by ASN tanks
|
||||
# completeness (the one true class is split across two predicted
|
||||
# clusters). The adversarial shift_clusterer demonstrates this and
|
||||
# the bound below rejects it.
|
||||
#
|
||||
# Campaign-level fixture only — the two DSL actors model two
|
||||
# distinct identities (different tooling, different operators) by
|
||||
# design. See the YAML header for the modeling note.
|
||||
#
|
||||
# Bounds are loose at v1; tighten as the algorithm matures.
|
||||
adjusted_rand_index:
|
||||
min: 1.0
|
||||
homogeneity:
|
||||
min: 1.0
|
||||
completeness:
|
||||
min: 1.0
|
||||
singleton_recall:
|
||||
min: 1.0
|
||||
108
tests/fixtures/campaigns/multi_operator.yaml
vendored
Normal file
108
tests/fixtures/campaigns/multi_operator.yaml
vendored
Normal file
@@ -0,0 +1,108 @@
|
||||
# Fixture 5 (multi_operator) — see development/CAMPAIGN_CLUSTERING.md §2.
|
||||
#
|
||||
# One campaign, two operators with distinct UKC roles. Phase-handoff is
|
||||
# the load-bearing signal; this fixture is what proves the algorithm
|
||||
# needs it.
|
||||
#
|
||||
# Actor A (night shift, hours 22-03 UTC):
|
||||
# Delivery → Exploitation → Persistence → Command-and-Control
|
||||
#
|
||||
# Actor B (day shift, hours 10-15 UTC):
|
||||
# Discovery → Lateral Movement → Collection → Exfiltration
|
||||
#
|
||||
# Different IPs, different ASNs, different JA3+HASSH (different
|
||||
# tools — A is the access broker, B is the post-exploitation
|
||||
# operator). What ties them is shared C2 callback and shared
|
||||
# stage-1 payload hash.
|
||||
#
|
||||
# Pass condition: a clusterer that resolves on shared C2 callback
|
||||
# (or, more generally, the planned similarity graph's payload +
|
||||
# C2 + phase-handoff signals) folds the two actors into one
|
||||
# campaign cluster. Demonstrated by `c2_callback_clusterer`.
|
||||
#
|
||||
# Adversarial condition: `shift_clusterer` (group attackers by
|
||||
# majority shift bucket — night/day/swing) puts A in "night" and B
|
||||
# in "day", fragmenting the campaign. Completeness collapses; the
|
||||
# bound floor on completeness rejects the bad clusterer. This is
|
||||
# the canonical demonstration that operational-schedule overlap is
|
||||
# NOT a campaign signal — different operators on different shifts
|
||||
# can still be one campaign.
|
||||
#
|
||||
# Like fixture 4, this is a CAMPAIGN-LEVEL fixture only. The two
|
||||
# DSL actors mint two distinct truth_identity_id rows by design
|
||||
# (different operators, different tools — they are different
|
||||
# identities even though they're one campaign). Identity-level
|
||||
# scoring is fixture 2's job.
|
||||
campaign:
|
||||
id: multi-operator-001
|
||||
duration_days: 3
|
||||
actors:
|
||||
- id: ops-broker-night
|
||||
asn: 64530
|
||||
ip_pool: sticky
|
||||
# Tool A's TLS stack — older OpenSSL signature.
|
||||
ja3: "771,49195-49199-49196-49200-156-157-47-53,0-23-65281-10-11-35-16-5-13-18-51-45-43-27,29-23-24,0"
|
||||
hassh: "ops-broker-eeeeeeee-eeeeeeee-eeeeeeee"
|
||||
hours_active_utc: [22, 23, 0, 1, 2, 3]
|
||||
jitter_seconds: 60
|
||||
- id: ops-postex-day
|
||||
asn: 64531
|
||||
ip_pool: sticky
|
||||
# Tool B's TLS stack — distinctly different from A.
|
||||
ja3: "769,49162-49161-49171-49172-51-50-47,0-10-11-13-23-65281,29-23-24-25,0"
|
||||
hassh: "ops-postex-ffffffff-ffffffff-ffffffff"
|
||||
hours_active_utc: [9, 10, 11, 12, 13]
|
||||
jitter_seconds: 60
|
||||
phases:
|
||||
# Actor A — initial access path, owns the foothold.
|
||||
- name: delivery
|
||||
actor: ops-broker-night
|
||||
tool_signature:
|
||||
c2_callback: "c2.shared-op.example"
|
||||
target_selector: { service: ssh, count: 2 }
|
||||
dwell_seconds: 1
|
||||
- name: exploitation
|
||||
actor: ops-broker-night
|
||||
tool_signature:
|
||||
payload_hash: "shared-op-stage1-payload"
|
||||
c2_callback: "c2.shared-op.example"
|
||||
target_selector: { service: ssh, count: 2 }
|
||||
dwell_seconds: 5
|
||||
- name: persistence
|
||||
actor: ops-broker-night
|
||||
tool_signature:
|
||||
c2_callback: "c2.shared-op.example"
|
||||
target_selector: { decky: previous_success, count: 1 }
|
||||
dwell_seconds: 5
|
||||
- name: command_and_control
|
||||
actor: ops-broker-night
|
||||
tool_signature:
|
||||
c2_callback: "c2.shared-op.example"
|
||||
target_selector: { decky: previous_success, count: 1 }
|
||||
dwell_seconds: 5
|
||||
# Actor B — picks up after A's foothold; shares C2 + payload.
|
||||
- name: discovery
|
||||
actor: ops-postex-day
|
||||
tool_signature:
|
||||
c2_callback: "c2.shared-op.example"
|
||||
target_selector: { decky: previous_success, count: 2 }
|
||||
dwell_seconds: 5
|
||||
- name: lateral_movement
|
||||
actor: ops-postex-day
|
||||
tool_signature:
|
||||
c2_callback: "c2.shared-op.example"
|
||||
target_selector: { service: ssh, count: 2 }
|
||||
dwell_seconds: 5
|
||||
- name: collection
|
||||
actor: ops-postex-day
|
||||
tool_signature:
|
||||
payload_hash: "shared-op-stage1-payload"
|
||||
c2_callback: "c2.shared-op.example"
|
||||
target_selector: { service: ssh, count: 2 }
|
||||
dwell_seconds: 5
|
||||
- name: exfiltration
|
||||
actor: ops-postex-day
|
||||
tool_signature:
|
||||
c2_callback: "c2.shared-op.example"
|
||||
target_selector: { service: ssh, count: 2 }
|
||||
dwell_seconds: 5
|
||||
24
tests/fixtures/campaigns/noise_floor.expected.yaml
vendored
Normal file
24
tests/fixtures/campaigns/noise_floor.expected.yaml
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
# Bounds for fixture 6 (noise_floor).
|
||||
#
|
||||
# Composite corpus: ~14 campaign-driven attackers (across 5 prior
|
||||
# fixtures' actors / rotations) + 18 truth-singleton noise rows
|
||||
# (8 inherited from lone_wolf + 10 added by this fixture).
|
||||
#
|
||||
# A correct algorithm groups every campaign correctly and leaves
|
||||
# every singleton singleton — score 1.0 across every metric.
|
||||
#
|
||||
# Singleton recall is the load-bearing metric here: noise
|
||||
# absorption is the failure mode that makes campaign attribution
|
||||
# useless in practice (a clusterer that pulls noise into real
|
||||
# campaigns dilutes attribution to nothing). The bound floor on
|
||||
# singleton_recall is what would catch that regression.
|
||||
#
|
||||
# Bounds are loose at v1; tighten as the algorithm matures.
|
||||
adjusted_rand_index:
|
||||
min: 1.0
|
||||
homogeneity:
|
||||
min: 1.0
|
||||
completeness:
|
||||
min: 1.0
|
||||
singleton_recall:
|
||||
min: 1.0
|
||||
34
tests/fixtures/campaigns/noise_floor.yaml
vendored
Normal file
34
tests/fixtures/campaigns/noise_floor.yaml
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# Fixture 6 (noise_floor) — see development/CAMPAIGN_CLUSTERING.md §2.
|
||||
#
|
||||
# Composite of all five prior fixtures plus N additional Delivery-only
|
||||
# noise scanners. Tests that the clusterer holds every campaign
|
||||
# together AND keeps every noise observation as its own singleton
|
||||
# in the presence of cross-fixture interference.
|
||||
#
|
||||
# This is a CONTROL fixture: if any of the five constituent fixtures
|
||||
# regresses inside the combined corpus but passes individually, the
|
||||
# regression is in cross-corpus interaction (signal collisions,
|
||||
# clusterer ambiguity, factory ID re-use, …). Catches a class of bugs
|
||||
# that single-corpus fixtures cannot.
|
||||
#
|
||||
# Composition is expressed by listing the constituent fixtures here;
|
||||
# the test file's loader expands this into a full `corpus.campaigns`
|
||||
# spec at load time so the factory itself stays unaware. This format
|
||||
# is fixture-6-specific — no other fixture uses ``include_fixtures``.
|
||||
#
|
||||
# Pass condition: ``composite_signals_clusterer`` (fingerprint OR C2
|
||||
# union-find — see fixture_harness.py) resolves every campaign and
|
||||
# leaves every noise scanner singleton. The bounds in
|
||||
# ``noise_floor.expected.yaml`` are global across the merged corpus.
|
||||
#
|
||||
# No standalone adversarial test for fixture 6 — each constituent
|
||||
# fixture already ships its own. What this fixture catches is
|
||||
# CROSS-fixture interference, which is a different failure mode from
|
||||
# the in-fixture adversarial cases.
|
||||
include_fixtures:
|
||||
- shared_wordlist.yaml
|
||||
- vpn_hopping.yaml
|
||||
- lone_wolf.yaml
|
||||
- paused_campaign.yaml
|
||||
- multi_operator.yaml
|
||||
extra_noise_scanners: 10
|
||||
24
tests/fixtures/campaigns/paused_campaign.expected.yaml
vendored
Normal file
24
tests/fixtures/campaigns/paused_campaign.expected.yaml
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
# Bounds for fixture 4 (paused_campaign).
|
||||
#
|
||||
# Ground truth at campaign-level: 1 campaign of 2 observation rows
|
||||
# (one per DSL actor — modeling the operator's two operational
|
||||
# windows). A correct algorithm scores 1.0 on every metric.
|
||||
#
|
||||
# Completeness is the load-bearing metric: a clusterer that lets a
|
||||
# multi-day silent period split the campaign tanks completeness
|
||||
# (the one true class is split across two predicted clusters,
|
||||
# matching the gap). The adversarial time_window_clusterer
|
||||
# demonstrates this and the bound below rejects it.
|
||||
#
|
||||
# This fixture is CAMPAIGN-LEVEL ONLY (see the fixture YAML for
|
||||
# why). No identity-level scoring.
|
||||
#
|
||||
# Bounds are loose at v1; tighten as the algorithm matures.
|
||||
adjusted_rand_index:
|
||||
min: 1.0
|
||||
homogeneity:
|
||||
min: 1.0
|
||||
completeness:
|
||||
min: 1.0
|
||||
singleton_recall:
|
||||
min: 1.0
|
||||
85
tests/fixtures/campaigns/paused_campaign.yaml
vendored
Normal file
85
tests/fixtures/campaigns/paused_campaign.yaml
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
# Fixture 4 (paused_campaign) — see development/CAMPAIGN_CLUSTERING.md §2.
|
||||
#
|
||||
# One campaign that operates in two sprints with a multi-day silence
|
||||
# between them:
|
||||
#
|
||||
# active days 1-2 (0-indexed [0, 1]) — Delivery, Exploitation
|
||||
# silent days 3-5 (0-indexed [2, 3, 4]) — pause window
|
||||
# active days 6-7 (0-indexed [5, 6]) — Discovery, Lateral Movement,
|
||||
# Exfiltration
|
||||
#
|
||||
# Modeled as TWO DSL actors representing the same operator's two
|
||||
# operational windows. Both share JA3, HASSH, payload, and C2
|
||||
# callback — the stable signals a fingerprint-driven clusterer
|
||||
# resolves on. Their ``active_days`` differ so each operator-half
|
||||
# emits sessions in disjoint time ranges, which is what makes the
|
||||
# adversarial time-window clusterer fragment the campaign.
|
||||
#
|
||||
# Two-actor modeling caveat: the factory mints a separate
|
||||
# ``truth_identity_id`` per DSL actor by design (see IDENTITY_
|
||||
# RESOLUTION.md — identities are recovered from signals, not
|
||||
# declared in the DSL). This is a CAMPAIGN-LEVEL fixture only;
|
||||
# identity-level scoring is fixture 2's job. The bound floors below
|
||||
# apply at level=campaign.
|
||||
#
|
||||
# Pass condition: a fingerprint-driven clusterer must fold both
|
||||
# operational windows into one cluster (shared JA3 + HASSH +
|
||||
# payload). A clusterer that lets a multi-day quiet period split
|
||||
# the campaign fails the completeness floor.
|
||||
#
|
||||
# Adversarial condition: ``time_window_clusterer`` (union sessions
|
||||
# within ≤1 day of each other) is unable to bridge the 4-day silent
|
||||
# stretch and splits the campaign into "before pause" and "after
|
||||
# pause" clusters. Completeness collapses; the bound floor rejects
|
||||
# this clusterer.
|
||||
campaign:
|
||||
id: paused-campaign-001
|
||||
duration_days: 7
|
||||
pause_windows:
|
||||
- [2, 4] # campaign-wide silence days 3-5 (0-indexed)
|
||||
actors:
|
||||
- id: ops-sprint-1
|
||||
asn: 64520
|
||||
ip_pool: sticky
|
||||
ja3: "771,4865-4867-49195-49199-49196-49200-157,0-23-65281-10-11-35-16-5-13-18-51-45-43-27,29-24,0"
|
||||
hassh: "paused-op-dddddddd-dddddddd-dddddddd"
|
||||
hours_active_utc: [9, 10, 11, 12, 13, 14, 15, 16]
|
||||
jitter_seconds: 60
|
||||
active_days: [0, 1]
|
||||
- id: ops-sprint-2
|
||||
asn: 64520 # same ASN — operator stays on same egress
|
||||
ip_pool: sticky
|
||||
ja3: "771,4865-4867-49195-49199-49196-49200-157,0-23-65281-10-11-35-16-5-13-18-51-45-43-27,29-24,0"
|
||||
hassh: "paused-op-dddddddd-dddddddd-dddddddd"
|
||||
hours_active_utc: [9, 10, 11, 12, 13, 14, 15, 16]
|
||||
jitter_seconds: 60
|
||||
active_days: [5, 6]
|
||||
phases:
|
||||
- name: delivery
|
||||
actor: ops-sprint-1
|
||||
target_selector: { service: ssh, count: 2 }
|
||||
dwell_seconds: 1
|
||||
- name: exploitation
|
||||
actor: ops-sprint-1
|
||||
tool_signature:
|
||||
payload_hash: "paused-op-stage1-payload"
|
||||
c2_callback: "c2.paused-op.example"
|
||||
target_selector: { service: ssh, count: 2 }
|
||||
dwell_seconds: 5
|
||||
- name: discovery
|
||||
actor: ops-sprint-2
|
||||
target_selector: { service: ssh, count: 2 }
|
||||
dwell_seconds: 5
|
||||
- name: lateral_movement
|
||||
actor: ops-sprint-2
|
||||
tool_signature:
|
||||
payload_hash: "paused-op-stage1-payload"
|
||||
c2_callback: "c2.paused-op.example"
|
||||
target_selector: { service: ssh, count: 2 }
|
||||
dwell_seconds: 5
|
||||
- name: exfiltration
|
||||
actor: ops-sprint-2
|
||||
tool_signature:
|
||||
c2_callback: "c2.paused-op.example"
|
||||
target_selector: { service: ssh, count: 2 }
|
||||
dwell_seconds: 5
|
||||
21
tests/fixtures/campaigns/shared_wordlist.expected.yaml
vendored
Normal file
21
tests/fixtures/campaigns/shared_wordlist.expected.yaml
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
# Bounds for fixture 1 (shared_wordlist).
|
||||
#
|
||||
# Ground truth: two distinct campaigns, one actor each → 2 truth-labels
|
||||
# of size 1. The clusterer must keep them separate. A correct algorithm
|
||||
# scores 1.0 across every metric on this fixture.
|
||||
#
|
||||
# Homogeneity is the load-bearing metric here: a clusterer that merges
|
||||
# the two campaigns based on shared credentials will tank homogeneity
|
||||
# (one predicted cluster contains members of two true campaigns).
|
||||
#
|
||||
# Bounds are loose at v1; tighten as the algorithm matures. Loosening
|
||||
# any bound to make CI pass requires PR-comment justification (per
|
||||
# CAMPAIGN_CLUSTERING.md §2).
|
||||
adjusted_rand_index:
|
||||
min: 1.0
|
||||
homogeneity:
|
||||
min: 1.0
|
||||
completeness:
|
||||
min: 1.0
|
||||
singleton_recall:
|
||||
min: 1.0
|
||||
84
tests/fixtures/campaigns/shared_wordlist.yaml
vendored
Normal file
84
tests/fixtures/campaigns/shared_wordlist.yaml
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
# Fixture 1 (shared_wordlist) — see development/CAMPAIGN_CLUSTERING.md §2.
|
||||
#
|
||||
# Two distinct campaigns, both bruteforcing SSH with the SAME credential
|
||||
# wordlist (rockyou-top1k flavor). EVERYTHING ELSE diverges:
|
||||
# - different ASNs (DigitalOcean vs Comcast residential)
|
||||
# - different IP ranges (sticky pools, generated separately)
|
||||
# - different JA3 / HASSH (different SSH client toolchains)
|
||||
# - different active hours (UTC-day vs UTC-night)
|
||||
#
|
||||
# Pass condition: the clusterer must NOT merge these into one campaign.
|
||||
# Credential overlap alone is not enough signal — commodity wordlists are
|
||||
# shared by hundreds of unrelated actors. A clusterer that leans on
|
||||
# credential-list Jaccard alone will fail this fixture (we prove this in
|
||||
# the test file with a deliberately-bad credential-Jaccard reference
|
||||
# clusterer).
|
||||
corpus:
|
||||
campaigns:
|
||||
- campaign:
|
||||
id: shared-wordlist-A
|
||||
actors:
|
||||
- id: actor-A
|
||||
asn: 14061 # DigitalOcean
|
||||
ip_pool: sticky
|
||||
ja3: "771,4865-4866-4867-49195-49199-49196-49200,0-23-65281-10-11-35-16-5-13-18-51-45-43-27-17513,29-23-24,0"
|
||||
hassh: "alpha-aaaaaaaa-aaaaaaaa-aaaaaaaa"
|
||||
hours_active_utc: [10, 11, 12, 13, 14]
|
||||
jitter_seconds: 60
|
||||
phases:
|
||||
- name: delivery
|
||||
actor: actor-A
|
||||
target_selector: { service: ssh, count: 1 }
|
||||
dwell_seconds: 1
|
||||
- name: credential_access
|
||||
actor: actor-A
|
||||
tool_signature:
|
||||
commands: []
|
||||
credentials:
|
||||
- [admin, admin]
|
||||
- [admin, password]
|
||||
- [admin, "12345"]
|
||||
- [root, root]
|
||||
- [root, toor]
|
||||
- [root, "123456"]
|
||||
- [user, user]
|
||||
- [test, test]
|
||||
target_selector: { service: ssh, count: 3 }
|
||||
dwell_seconds: 5
|
||||
duration_days: 1
|
||||
|
||||
- campaign:
|
||||
id: shared-wordlist-B
|
||||
actors:
|
||||
- id: actor-B
|
||||
asn: 7922 # Comcast residential
|
||||
ip_pool: sticky
|
||||
ja3: "769,49195-49199-156-49162-49161-49171-49172-51-50-47,0-10-11-13-23-65281,29-23-24-25,0"
|
||||
hassh: "beta-bbbbbbbb-bbbbbbbb-bbbbbbbb"
|
||||
hours_active_utc: [22, 23, 0, 1, 2]
|
||||
jitter_seconds: 60
|
||||
phases:
|
||||
- name: delivery
|
||||
actor: actor-B
|
||||
target_selector: { service: ssh, count: 1 }
|
||||
dwell_seconds: 1
|
||||
- name: credential_access
|
||||
actor: actor-B
|
||||
tool_signature:
|
||||
commands: []
|
||||
# IDENTICAL wordlist to campaign A — this is the trap.
|
||||
credentials:
|
||||
- [admin, admin]
|
||||
- [admin, password]
|
||||
- [admin, "12345"]
|
||||
- [root, root]
|
||||
- [root, toor]
|
||||
- [root, "123456"]
|
||||
- [user, user]
|
||||
- [test, test]
|
||||
target_selector: { service: ssh, count: 3 }
|
||||
dwell_seconds: 5
|
||||
duration_days: 1
|
||||
|
||||
noise:
|
||||
scanner_count: 0
|
||||
24
tests/fixtures/campaigns/slow_burn.expected.yaml
vendored
Normal file
24
tests/fixtures/campaigns/slow_burn.expected.yaml
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
# Bounds for fixture 7 (slow_burn).
|
||||
#
|
||||
# Ground truth at campaign-level: 1 campaign of 3 observation rows
|
||||
# (one per operational window — recon, exploit, action). A correct
|
||||
# algorithm scores 1.0 across every metric on this fixture.
|
||||
#
|
||||
# Completeness is the load-bearing metric: a clusterer that lets
|
||||
# multi-week silence fragment the campaign tanks completeness (the
|
||||
# one true class is split across the operational windows). The
|
||||
# adversarial recency_decay_clusterer demonstrates this and the
|
||||
# bound below rejects it.
|
||||
#
|
||||
# Campaign-level fixture only — the three DSL actors model the
|
||||
# operator's three operational windows by design.
|
||||
#
|
||||
# Bounds are loose at v1; tighten as the algorithm matures.
|
||||
adjusted_rand_index:
|
||||
min: 1.0
|
||||
homogeneity:
|
||||
min: 1.0
|
||||
completeness:
|
||||
min: 1.0
|
||||
singleton_recall:
|
||||
min: 1.0
|
||||
119
tests/fixtures/campaigns/slow_burn.yaml
vendored
Normal file
119
tests/fixtures/campaigns/slow_burn.yaml
vendored
Normal file
@@ -0,0 +1,119 @@
|
||||
# Fixture 7 (slow_burn) — see development/CAMPAIGN_CLUSTERING.md §2.
|
||||
#
|
||||
# Multi-month APT campaign. The unique signal this fixture stresses
|
||||
# is OPERATIONAL TEMPO: APTs (real ones, not skiddies) take their
|
||||
# time. Recon over weeks, exploitation later, action-on-objectives
|
||||
# later still. Long stretches of true silence between phases.
|
||||
# Compresses-to-three-days adversaries this is not.
|
||||
#
|
||||
# A MazeNET-style deep nested topology (DECNET's recursive DAG mode)
|
||||
# is exactly what an APT operator burns weeks against — mapping
|
||||
# decoy networks, working out which subnet looks productive, only
|
||||
# then committing to exploitation. This fixture encodes that tempo
|
||||
# as a 90-day campaign with three operational windows:
|
||||
#
|
||||
# week 2 (days 7-11) Delivery, Discovery
|
||||
# month 2 (days 35-39) Exploitation, Persistence
|
||||
# month 3 (days 75-79) Lateral Movement, Collection, Exfiltration
|
||||
#
|
||||
# Modeled as three DSL actors representing the same operator's three
|
||||
# operational phases (same modeling caveat as fixtures 4 and 5: the
|
||||
# factory mints a separate truth_identity_id per DSL actor; this is
|
||||
# a CAMPAIGN-LEVEL fixture only). All three share JA3 + HASSH +
|
||||
# payload + C2 callback — the operator's toolchain stays stable
|
||||
# across the campaign.
|
||||
#
|
||||
# Pass condition: composite_signals_clusterer (fingerprint OR C2)
|
||||
# folds all three windows into one cluster regardless of when they
|
||||
# happened. Time-agnostic edge construction is what makes this work.
|
||||
#
|
||||
# Adversarial condition: recency_decay_clusterer with a 14-day
|
||||
# half-life and a 0.5 weight threshold cannot bridge the multi-week
|
||||
# silences. Edges between week-2 and month-2 (≥24 days) decay to
|
||||
# ~exp(-24/14) ≈ 0.18 < 0.5 → dropped. Edges between month-2 and
|
||||
# month-3 (≥36 days) decay to ~exp(-36/14) ≈ 0.075 → dropped. The
|
||||
# campaign fragments into three clusters; completeness collapses.
|
||||
#
|
||||
# This is the canonical production failure mode for graph-based
|
||||
# clusterers that silently expire old edges to bound memory or
|
||||
# bias toward "what's hot." Catching it in synthetic data is what
|
||||
# this fixture exists for.
|
||||
campaign:
|
||||
id: slow-burn-001
|
||||
duration_days: 90
|
||||
actors:
|
||||
- id: ops-recon
|
||||
asn: 64540
|
||||
ip_pool: sticky
|
||||
ja3: "771,4865-4866-4867-49195-49199-49196-49200-156-157-47-53,0-23-65281-10-11-35-16-5-13-18-51-45-43-27-17513,29-23-24,0"
|
||||
hassh: "slow-burn-gggggggg-gggggggg-gggggggg"
|
||||
hours_active_utc: [3, 4, 5]
|
||||
jitter_seconds: 60
|
||||
active_days: [7, 8, 9, 10, 11]
|
||||
- id: ops-exploit
|
||||
asn: 64541
|
||||
ip_pool: sticky
|
||||
ja3: "771,4865-4866-4867-49195-49199-49196-49200-156-157-47-53,0-23-65281-10-11-35-16-5-13-18-51-45-43-27-17513,29-23-24,0"
|
||||
hassh: "slow-burn-gggggggg-gggggggg-gggggggg"
|
||||
hours_active_utc: [3, 4, 5]
|
||||
jitter_seconds: 60
|
||||
active_days: [35, 36, 37, 38, 39]
|
||||
- id: ops-action
|
||||
asn: 64542
|
||||
ip_pool: sticky
|
||||
ja3: "771,4865-4866-4867-49195-49199-49196-49200-156-157-47-53,0-23-65281-10-11-35-16-5-13-18-51-45-43-27-17513,29-23-24,0"
|
||||
hassh: "slow-burn-gggggggg-gggggggg-gggggggg"
|
||||
hours_active_utc: [3, 4, 5]
|
||||
jitter_seconds: 60
|
||||
active_days: [75, 76, 77, 78, 79]
|
||||
phases:
|
||||
# Week 2 — recon window. Delivery probes, discovery against the
|
||||
# MazeNET surface to identify productive subnets.
|
||||
- name: delivery
|
||||
actor: ops-recon
|
||||
tool_signature:
|
||||
c2_callback: "c2.slow-burn.example"
|
||||
target_selector: { service: any, count: 3 }
|
||||
dwell_seconds: 1
|
||||
- name: discovery
|
||||
actor: ops-recon
|
||||
tool_signature:
|
||||
c2_callback: "c2.slow-burn.example"
|
||||
target_selector: { service: any, count: 3 }
|
||||
dwell_seconds: 5
|
||||
# Month 2 — exploitation. Operator commits to one of the
|
||||
# productive subnets identified during recon.
|
||||
- name: exploitation
|
||||
actor: ops-exploit
|
||||
tool_signature:
|
||||
payload_hash: "slow-burn-stage1-payload"
|
||||
c2_callback: "c2.slow-burn.example"
|
||||
target_selector: { service: ssh, count: 3 }
|
||||
dwell_seconds: 10
|
||||
- name: persistence
|
||||
actor: ops-exploit
|
||||
tool_signature:
|
||||
c2_callback: "c2.slow-burn.example"
|
||||
target_selector: { decky: previous_success, count: 2 }
|
||||
dwell_seconds: 10
|
||||
# Month 3 — actions on objectives. Lateral movement, collection,
|
||||
# exfil — only after the operator has confidence in the foothold.
|
||||
- name: lateral_movement
|
||||
actor: ops-action
|
||||
tool_signature:
|
||||
c2_callback: "c2.slow-burn.example"
|
||||
target_selector: { service: ssh, count: 3 }
|
||||
dwell_seconds: 10
|
||||
- name: collection
|
||||
actor: ops-action
|
||||
tool_signature:
|
||||
payload_hash: "slow-burn-stage1-payload"
|
||||
c2_callback: "c2.slow-burn.example"
|
||||
target_selector: { service: ssh, count: 2 }
|
||||
dwell_seconds: 10
|
||||
- name: exfiltration
|
||||
actor: ops-action
|
||||
tool_signature:
|
||||
c2_callback: "c2.slow-burn.example"
|
||||
target_selector: { service: ssh, count: 2 }
|
||||
dwell_seconds: 10
|
||||
25
tests/fixtures/campaigns/vpn_hopping.expected.yaml
vendored
Normal file
25
tests/fixtures/campaigns/vpn_hopping.expected.yaml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
# Bounds for fixture 2 (vpn_hopping).
|
||||
#
|
||||
# Ground truth at campaign-level: 1 campaign of 5 observation rows.
|
||||
# Ground truth at identity-level: 1 identity of 5 observation rows.
|
||||
# A correct algorithm scores 1.0 across every metric on this fixture.
|
||||
#
|
||||
# Completeness is the load-bearing metric: a clusterer that fragments
|
||||
# the campaign by IP/ASN tanks completeness (the one true class is
|
||||
# split across many predicted clusters). The adversarial asn_clusterer
|
||||
# in the test file demonstrates this and the bound below rejects it.
|
||||
#
|
||||
# No true singletons in this fixture — singleton_recall is trivially
|
||||
# 1.0 (the metric returns 1.0 when truth has no singletons).
|
||||
#
|
||||
# Bounds are loose at v1; tighten as the algorithm matures. Loosening
|
||||
# any bound to make CI pass requires PR-comment justification (per
|
||||
# CAMPAIGN_CLUSTERING.md §2).
|
||||
adjusted_rand_index:
|
||||
min: 1.0
|
||||
homogeneity:
|
||||
min: 1.0
|
||||
completeness:
|
||||
min: 1.0
|
||||
singleton_recall:
|
||||
min: 1.0
|
||||
55
tests/fixtures/campaigns/vpn_hopping.yaml
vendored
Normal file
55
tests/fixtures/campaigns/vpn_hopping.yaml
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
# Fixture 2 (vpn_hopping) — see development/CAMPAIGN_CLUSTERING.md §2
|
||||
# and development/IDENTITY_RESOLUTION.md.
|
||||
#
|
||||
# One campaign, one actor, rotating across 5 distinct ASNs. JA3, HASSH,
|
||||
# and payload_hash are STABLE across every rotation — these are the
|
||||
# signals "the attacker can't cheaply rotate" (per the identity
|
||||
# resolution design doc) and they're the reason a clusterer should
|
||||
# recover all 5 observation rows as ONE identity, ONE campaign.
|
||||
#
|
||||
# Ground truth (verified at every level):
|
||||
# - 5 observations → 1 identity → 1 campaign (per truth_labels())
|
||||
#
|
||||
# Pass condition: a fingerprint-driven clusterer must fold all 5 rows
|
||||
# into one cluster at both campaign-level and identity-level scoring.
|
||||
#
|
||||
# Adversarial condition: an asn_clusterer (group attackers by ASN —
|
||||
# the textbook bad heuristic) must fragment the campaign into 5
|
||||
# pieces and breach the completeness floor. This is what proves "ASN
|
||||
# match" is correctly weighted "very low" in the planned similarity
|
||||
# graph (per TODO clusterer feature list).
|
||||
#
|
||||
# ASN choice: synthetic private-use values (RFC 6996 64512–64534) so
|
||||
# the fixture never collides with real-world data and signals "not
|
||||
# real" to readers at a glance.
|
||||
campaign:
|
||||
id: vpn-hopping-001
|
||||
actors:
|
||||
- id: hopper-a
|
||||
asn: 64512 # primary; rotation_asns overrides per row
|
||||
ip_pool: rotating
|
||||
rotation_count: 5
|
||||
rotation_asns: [64512, 64513, 64514, 64515, 64516]
|
||||
ja3: "771,4865-4866-4867-49195-49199-49196-49200,0-23-65281-10-11-35-16-5-13-18-51-45-43-27,29-23-24,0"
|
||||
hassh: "vpn-hopper-cccccccc-cccccccc-cccccccc"
|
||||
hours_active_utc: [12, 13, 14, 15, 16]
|
||||
jitter_seconds: 60
|
||||
phases:
|
||||
- name: delivery
|
||||
actor: hopper-a
|
||||
target_selector: { service: ssh, count: 5 }
|
||||
dwell_seconds: 1
|
||||
- name: exploitation
|
||||
actor: hopper-a
|
||||
tool_signature:
|
||||
# Stable payload across every rotation — same dropper from
|
||||
# whatever staging the operator uses, regardless of which VPN
|
||||
# exit they emerge from.
|
||||
payload_hash: "vpn-hopper-stage1-payload"
|
||||
target_selector: { service: ssh, count: 5 }
|
||||
dwell_seconds: 5
|
||||
- name: discovery
|
||||
actor: hopper-a
|
||||
target_selector: { service: ssh, count: 5 }
|
||||
dwell_seconds: 5
|
||||
duration_days: 2
|
||||
Reference in New Issue
Block a user