feat(profiler): persist raw SSH KEX algorithm ordering

Prober already emits kex_algorithms in hassh_fingerprint syslog events, but
the raw ordered list was only queryable via the generic bounty store. Add a
dedicated AttackerBehavior.kex_order_raw column (TEXT, JSON list) so
post-v1 KEX-order fingerprinting has a typed, indexable home.

Pipeline:
  - sniffer_rollup() now consumes hassh_fingerprint events and collects
    distinct kex_algorithms strings across ports.
  - build_behavior_record() JSON-encodes the list (NULL when empty).
  - sqlmodel_repo._deserialize_behavior() parses it back into a list.

Closes pre-v1 gap #1 from SIGNAL_CAPTURE_AUDIT.md.
This commit is contained in:
2026-04-22 21:29:46 -04:00
parent 25838eb9f3
commit 8181f39ae2
5 changed files with 71 additions and 0 deletions

View File

@@ -89,10 +89,12 @@ def build_behavior_record(events: list[LogEvent]) -> dict[str, Any]:
if all_tools:
_span.set_attribute("tools", ",".join(all_tools))
kex_list = rollup.get("kex_order_raw") or []
return {
"os_guess": rollup["os_guess"],
"hop_distance": rollup["hop_distance"],
"tcp_fingerprint": json.dumps(rollup["tcp_fingerprint"]),
"kex_order_raw": json.dumps(kex_list) if kex_list else None,
"retransmit_count": rollup["retransmit_count"],
"behavior_class": behavior,
"beacon_interval_s": beacon_interval_s,

View File

@@ -19,6 +19,8 @@ _SNIFFER_SYN_EVENT: str = "tcp_syn_fingerprint"
_SNIFFER_FLOW_EVENT: str = "tcp_flow_timing"
# Prober-emitted active-probe result (SYN-ACK fingerprint of attacker machine).
_PROBER_TCPFP_EVENT: str = "tcpfp_fingerprint"
# Prober-emitted HASSHServer fingerprint; carries the raw kex_algorithms string.
_PROBER_HASSH_EVENT: str = "hassh_fingerprint"
# Canonical initial TTL for each coarse OS bucket. Used to derive hop
# distance when only the observed TTL is available (prober path).
@@ -71,6 +73,8 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
hops: list[int] = []
tcp_fp: dict[str, Any] | None = None
retransmits = 0
kex_order_raw: list[str] = []
_kex_seen: set[str] = set()
for e in events:
if e.event_type == _SNIFFER_SYN_EVENT:
@@ -109,6 +113,15 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
except (TypeError, ValueError):
pass
elif e.event_type == _PROBER_HASSH_EVENT:
# Prober HASSHServer probe: preserve the raw kex_algorithms list
# for post-hoc ordering analysis. Dedup because a single attacker
# SSH service will emit the same list per port/probe cycle.
kex = e.fields.get("kex_algorithms")
if kex and kex not in _kex_seen:
kex_order_raw.append(kex)
_kex_seen.add(kex)
elif e.event_type == _PROBER_TCPFP_EVENT:
# Active-probe result: prober sent SYN to attacker, got SYN-ACK back.
# Field names differ from the passive sniffer (different emitter).
@@ -159,4 +172,5 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
"hop_distance": hop_distance,
"tcp_fingerprint": tcp_fp or {},
"retransmit_count": retransmits,
"kex_order_raw": kex_order_raw,
}

View File

@@ -175,6 +175,13 @@ class AttackerBehavior(SQLModel, table=True):
default="{}",
sa_column=Column("tcp_fingerprint", Text, nullable=False, default="{}"),
) # JSON: window, wscale, mss, options_sig
# Raw SSH KEX algorithm preference strings observed across HASSH probes
# (one entry per hassh_fingerprint event). Keeping the raw ordered list
# enables post-hoc KEX-order fingerprinting beyond the HASSH hash.
kex_order_raw: Optional[str] = Field(
default=None,
sa_column=Column("kex_order_raw", Text, nullable=True),
) # JSON list[str] — kex_algorithms comma-separated strings
retransmit_count: int = Field(default=0)
# Behavioral (derived by the profiler from log-event timing)
behavior_class: Optional[str] = None # beaconing | interactive | scanning | brute_force | slow_scan | mixed | unknown

View File

@@ -673,6 +673,16 @@ class SQLModelRepository(BaseRepository):
d["tool_guesses"] = []
elif raw is None:
d["tool_guesses"] = []
# Same list-or-None pattern for kex_order_raw.
raw_kex = d.get("kex_order_raw")
if isinstance(raw_kex, str):
try:
parsed_kex = json.loads(raw_kex)
d["kex_order_raw"] = parsed_kex if isinstance(parsed_kex, list) else [parsed_kex]
except (json.JSONDecodeError, TypeError):
d["kex_order_raw"] = []
elif raw_kex is None:
d["kex_order_raw"] = []
return d
@staticmethod