feat(clustering): roll session digraph SimHashes into identity centroid
The identity clusterer folds an identity's per-session motor.digraph_simhash observations into one 8-byte bitwise-majority centroid (denoises per-session jitter) and writes it to AttackerIdentity.kd_digraph_simhash via update_identity_fingerprints — the orphaned column is now populated. list_identities_for_clustering projects it so the campaign clusterer can read it. Extends the repo abstract + DummyRepo stub/coverage.
This commit is contained in:
@@ -683,15 +683,19 @@ class BaseRepository(ABC):
|
||||
ja3_hashes: Optional[str] = None,
|
||||
hassh_hashes: Optional[str] = None,
|
||||
tls_cert_sha256: Optional[str] = None,
|
||||
kd_digraph_simhash: Optional[bytes] = None,
|
||||
) -> None:
|
||||
"""Set the fingerprint summary columns on one ``AttackerIdentity``.
|
||||
|
||||
Each argument is a JSON-encoded ``list[str]`` (the federation
|
||||
wire shape) or ``None`` to leave the corresponding column at
|
||||
``NULL``. Always overwrites — the rollup writer is the source
|
||||
of truth for these columns, computed deterministically from
|
||||
the identity's member observations every clusterer tick. Also
|
||||
bumps ``updated_at`` so cache subscribers can invalidate.
|
||||
``ja3_hashes`` / ``hassh_hashes`` / ``tls_cert_sha256`` are
|
||||
JSON-encoded ``list[str]`` (the federation wire shape) or
|
||||
``None``. ``kd_digraph_simhash`` is the 8-byte keystroke-rhythm
|
||||
centroid (bitwise majority over the identity's session-level
|
||||
``motor.digraph_simhash`` observations) or ``None``. Always
|
||||
overwrites — the rollup writer is the source of truth for these
|
||||
columns, recomputed deterministically from the identity's member
|
||||
observations every clusterer tick. Also bumps ``updated_at`` so
|
||||
cache subscribers can invalidate.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@@ -105,6 +105,7 @@ class CampaignsMixin(_MixinBase):
|
||||
AttackerIdentity.hassh_hashes,
|
||||
AttackerIdentity.payload_simhashes,
|
||||
AttackerIdentity.c2_endpoints,
|
||||
AttackerIdentity.kd_digraph_simhash,
|
||||
).order_by(AttackerIdentity.created_at)
|
||||
if limit is not None:
|
||||
statement = statement.limit(limit)
|
||||
@@ -129,6 +130,7 @@ class CampaignsMixin(_MixinBase):
|
||||
"hassh_hashes": row.hassh_hashes,
|
||||
"payload_simhashes": row.payload_simhashes,
|
||||
"c2_endpoints": row.c2_endpoints,
|
||||
"kd_digraph_simhash": row.kd_digraph_simhash,
|
||||
}
|
||||
for row in result.all()
|
||||
]
|
||||
|
||||
@@ -173,6 +173,7 @@ class IdentitiesMixin(_MixinBase):
|
||||
ja3_hashes: Optional[str] = None,
|
||||
hassh_hashes: Optional[str] = None,
|
||||
tls_cert_sha256: Optional[str] = None,
|
||||
kd_digraph_simhash: Optional[bytes] = None,
|
||||
) -> None:
|
||||
statement = (
|
||||
update(AttackerIdentity)
|
||||
@@ -181,6 +182,7 @@ class IdentitiesMixin(_MixinBase):
|
||||
ja3_hashes=ja3_hashes,
|
||||
hassh_hashes=hassh_hashes,
|
||||
tls_cert_sha256=tls_cert_sha256,
|
||||
kd_digraph_simhash=kd_digraph_simhash,
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user