feat(clustering): combined edge weight + medium-tier wiring
The clusterer now drops a single high-tier function call in favor of a tier-weighted sum. Tier multipliers (high=1.0, medium=0.6, low=0.2, very_low=0.05) are tuned so the threshold (1.0) admits high-tier agreement alone while leaving every weaker tier — and every combination of weaker tiers — under threshold. Per-tier discipline tested: - high alone clusters - medium alone does NOT cluster (supporting signal only) - low alone does NOT cluster (fixture 1's failure mode) - very-low alone does NOT cluster (fixture 2's failure mode) - all three weak tiers stacked still don't reach threshold - high + medium clusters (high already saturates) The combination is forward-compatible: low + very-low contributions are computed today but always project to 0.0 because the production adapter doesn't populate credentials / ASN-edge inputs into the fixture path yet. Their contribution becomes load-bearing in commit 7 when the low-tier landing tightens the F1 / F2 bounds. Fixture 4 (paused_campaign) ratchet added: high-tier signal carries the multi-day-silence campaign into one identity. Time-agnostic invariant — silence is irrelevant to the edge weight.
This commit is contained in:
@@ -10,7 +10,9 @@ from __future__ import annotations
|
||||
import pytest
|
||||
|
||||
from decnet.clustering.impl.similarity import (
|
||||
EDGE_THRESHOLD,
|
||||
Observation,
|
||||
combined_edge_weight,
|
||||
from_synthetic,
|
||||
high_weight_edge,
|
||||
low_weight_edge,
|
||||
@@ -179,6 +181,70 @@ def test_observations_carry_no_timestamps():
|
||||
# ─── from_synthetic adapter ────────────────────────────────────────────────
|
||||
|
||||
|
||||
# ─── combined_edge_weight tier discipline ─────────────────────────────────
|
||||
|
||||
|
||||
def test_combined_high_alone_crosses_threshold():
|
||||
a = _obs(ja3="ja3-shared")
|
||||
b = _obs(ja3="ja3-shared")
|
||||
assert combined_edge_weight(a, b) >= EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_medium_alone_below_threshold():
|
||||
"""Single medium-tier match must NOT cluster — medium is a
|
||||
supporting signal, never a clustering driver on its own."""
|
||||
a = _obs(commands_by_phase={"discovery": ("ls", "id", "uname")})
|
||||
b = _obs(commands_by_phase={"discovery": ("ls", "id", "uname")})
|
||||
weight = combined_edge_weight(a, b)
|
||||
assert 0 < weight < EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_low_alone_below_threshold():
|
||||
"""Credential-only overlap must NOT cluster — fixture 1's failure mode."""
|
||||
a = _obs(credentials=frozenset({("root", "toor"), ("admin", "admin")}))
|
||||
b = _obs(credentials=frozenset({("root", "toor"), ("admin", "admin")}))
|
||||
weight = combined_edge_weight(a, b)
|
||||
assert 0 < weight < EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_very_low_alone_below_threshold():
|
||||
"""ASN-only overlap must NOT cluster — fixture 2's failure mode."""
|
||||
a = _obs(asn=64500)
|
||||
b = _obs(asn=64500)
|
||||
weight = combined_edge_weight(a, b)
|
||||
assert 0 < weight < EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_all_weak_tiers_still_below_threshold():
|
||||
"""Even all three weaker tiers stacked don't reach threshold —
|
||||
only a high-tier signal does."""
|
||||
a = _obs(
|
||||
asn=64500,
|
||||
credentials=frozenset({("root", "toor")}),
|
||||
commands_by_phase={"discovery": ("ls",)},
|
||||
)
|
||||
b = _obs(
|
||||
asn=64500,
|
||||
credentials=frozenset({("root", "toor")}),
|
||||
commands_by_phase={"discovery": ("ls",)},
|
||||
)
|
||||
# 0.6*1.0 (medium) + 0.2*1.0 (low) + 0.05*1.0 (very_low) = 0.85
|
||||
weight = combined_edge_weight(a, b)
|
||||
assert weight < EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_high_plus_medium_clusters():
|
||||
a = _obs(ja3="ja3-x", commands_by_phase={"discovery": ("ls",)})
|
||||
b = _obs(ja3="ja3-x", commands_by_phase={"discovery": ("ls",)})
|
||||
assert combined_edge_weight(a, b) >= EDGE_THRESHOLD
|
||||
|
||||
|
||||
def test_combined_no_signal_returns_zero():
|
||||
a = _obs()
|
||||
b = _obs()
|
||||
assert combined_edge_weight(a, b) == 0.0
|
||||
|
||||
|
||||
def test_from_synthetic_round_trip():
|
||||
"""The adapter projects a SyntheticAttacker into an Observation
|
||||
that the edge functions can score over."""
|
||||
|
||||
Reference in New Issue
Block a user