feat(sniffer): capture SSH client banner from TCP stream

Parse RFC 4253 §4.2 identification strings from the first attacker→decky
data segment on TCP/22; emit ssh_client_banner syslog events and bus
fan-out. Profiler's sniffer_rollup dedupes observed banners into a new
AttackerBehavior.ssh_client_banners JSON column.

Closes gap #3 from SIGNAL_CAPTURE_AUDIT.md.
This commit is contained in:
2026-04-22 21:37:01 -04:00
parent 8181f39ae2
commit d3321324eb
7 changed files with 148 additions and 0 deletions

View File

@@ -90,11 +90,13 @@ def build_behavior_record(events: list[LogEvent]) -> dict[str, Any]:
_span.set_attribute("tools", ",".join(all_tools))
kex_list = rollup.get("kex_order_raw") or []
ssh_banners = rollup.get("ssh_client_banners") or []
return {
"os_guess": rollup["os_guess"],
"hop_distance": rollup["hop_distance"],
"tcp_fingerprint": json.dumps(rollup["tcp_fingerprint"]),
"kex_order_raw": json.dumps(kex_list) if kex_list else None,
"ssh_client_banners": json.dumps(ssh_banners) if ssh_banners else None,
"retransmit_count": rollup["retransmit_count"],
"behavior_class": behavior,
"beacon_interval_s": beacon_interval_s,

View File

@@ -21,6 +21,8 @@ _SNIFFER_FLOW_EVENT: str = "tcp_flow_timing"
_PROBER_TCPFP_EVENT: str = "tcpfp_fingerprint"
# Prober-emitted HASSHServer fingerprint; carries the raw kex_algorithms string.
_PROBER_HASSH_EVENT: str = "hassh_fingerprint"
# Sniffer-emitted SSH client identification string (RFC 4253 §4.2).
_SNIFFER_SSH_BANNER_EVENT: str = "ssh_client_banner"
# Canonical initial TTL for each coarse OS bucket. Used to derive hop
# distance when only the observed TTL is available (prober path).
@@ -75,6 +77,8 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
retransmits = 0
kex_order_raw: list[str] = []
_kex_seen: set[str] = set()
ssh_client_banners: list[str] = []
_ssh_banner_seen: set[str] = set()
for e in events:
if e.event_type == _SNIFFER_SYN_EVENT:
@@ -122,6 +126,15 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
kex_order_raw.append(kex)
_kex_seen.add(kex)
elif e.event_type == _SNIFFER_SSH_BANNER_EVENT:
# Sniffer-observed SSH identification string from attacker.
# Dedup: the same attacker will reuse the same client banner
# across flows/reconnects; record distinct values in order seen.
banner = e.fields.get("ssh_version")
if banner and banner not in _ssh_banner_seen:
ssh_client_banners.append(banner)
_ssh_banner_seen.add(banner)
elif e.event_type == _PROBER_TCPFP_EVENT:
# Active-probe result: prober sent SYN to attacker, got SYN-ACK back.
# Field names differ from the passive sniffer (different emitter).
@@ -173,4 +186,5 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
"tcp_fingerprint": tcp_fp or {},
"retransmit_count": retransmits,
"kex_order_raw": kex_order_raw,
"ssh_client_banners": ssh_client_banners,
}

View File

@@ -61,9 +61,35 @@ _BUS_TRAFFIC_EVENTS: frozenset[str] = frozenset({
"tls_session",
"tcp_flow_timing",
"tcp_syn_fingerprint",
"ssh_client_banner",
})
def _parse_ssh_banner(data: bytes) -> str | None:
"""
Return the attacker's SSH identification string (RFC 4253 §4.2) if
*data* begins with one, else None.
A valid banner starts with ``SSH-`` and terminates at the first CR or LF
within the 255-byte RFC-mandated window. The returned string is decoded
as ASCII and stripped of the trailing CR/LF bytes.
"""
if not data.startswith(b"SSH-"):
return None
end = -1
# RFC 4253: identification string (incl. CR LF) must not exceed 255 bytes.
for i, b in enumerate(data[:255]):
if b in (0x0D, 0x0A): # CR or LF
end = i
break
if end < 5: # "SSH-X" minimum
return None
try:
return data[:end].decode("ascii", errors="strict")
except UnicodeDecodeError:
return None
# ─── TCP option extraction for passive fingerprinting ───────────────────────
def _extract_tcp_fingerprint(tcp_options: list) -> dict[str, Any]:
@@ -1053,6 +1079,29 @@ class SnifferEngine:
if not payload:
return
# SSH client banner (RFC 4253 §4.2): attacker→decky TCP/22, first
# application-data segment of the flow. Emit once per flow.
if (
dst_port == 22
and dst_ip in self._ip_to_decky
and direction_forward
):
flow = self._flows.get(flow_key)
if flow is not None and not flow.get("ssh_banner_seen"):
banner = _parse_ssh_banner(payload)
if banner is not None:
flow["ssh_banner_seen"] = True
target_node = self._ip_to_decky[dst_ip]
self._log(
target_node,
"ssh_client_banner",
src_ip=src_ip,
src_port=str(src_port),
dst_ip=dst_ip,
dst_port=str(dst_port),
ssh_version=banner,
)
if payload[0] != _TLS_RECORD_HANDSHAKE:
return

View File

@@ -182,6 +182,13 @@ class AttackerBehavior(SQLModel, table=True):
default=None,
sa_column=Column("kex_order_raw", Text, nullable=True),
) # JSON list[str] — kex_algorithms comma-separated strings
# Sniffer-observed SSH client identification strings (RFC 4253 §4.2),
# deduped in observation order. Captures the attacker's SSH client
# software (e.g. "SSH-2.0-OpenSSH_9.2p1", "SSH-2.0-libssh2_1.10.0").
ssh_client_banners: Optional[str] = Field(
default=None,
sa_column=Column("ssh_client_banners", Text, nullable=True),
) # JSON list[str]
retransmit_count: int = Field(default=0)
# Behavioral (derived by the profiler from log-event timing)
behavior_class: Optional[str] = None # beaconing | interactive | scanning | brute_force | slow_scan | mixed | unknown

View File

@@ -683,6 +683,16 @@ class SQLModelRepository(BaseRepository):
d["kex_order_raw"] = []
elif raw_kex is None:
d["kex_order_raw"] = []
# Same list-or-None pattern for ssh_client_banners.
raw_banners = d.get("ssh_client_banners")
if isinstance(raw_banners, str):
try:
parsed_banners = json.loads(raw_banners)
d["ssh_client_banners"] = parsed_banners if isinstance(parsed_banners, list) else [parsed_banners]
except (json.JSONDecodeError, TypeError):
d["ssh_client_banners"] = []
elif raw_banners is None:
d["ssh_client_banners"] = []
return d
@staticmethod