diff --git a/decnet/collector/worker.py b/decnet/collector/worker.py index d744cec1..ccbffdea 100644 --- a/decnet/collector/worker.py +++ b/decnet/collector/worker.py @@ -145,6 +145,27 @@ _RFC5424_RE = re.compile( r"(\S+) " # 4: MSGID (event_type) r"(.+)$", # 5: SD element + optional MSG ) + +# Honeypot SSH containers export a ``PROMPT_COMMAND`` that calls +# ``logger --rfc5424 --msgid command -p user.info -t bash "CMD …"``. +# That inner RFC 5424 line lands on the container's stdout, where the +# Docker stream reader prepends ANOTHER RFC 5424 envelope (PRI=14, +# HOSTNAME=, APP-NAME=1, MSGID=NIL). The outer parse therefore +# sees ``event_type == "-"`` while the real MSGID (``command``) is +# inside the body. We detect that case and re-extract the inner +# ``HOSTNAME APP-NAME PROCID MSGID rest`` so downstream consumers see +# ``event_type == "command"`` plus the real source hostname. +# +# Anchored on an ISO-8601 timestamp at the head of the body so we +# don't false-match free-form prose like "Connection from 1.2.3.4". +_INNER_RFC5424_RE = re.compile( + r"^(\d{4}-\d{2}-\d{2}T\S+)\s+" # 1: inner TIMESTAMP + r"(\S+)\s+" # 2: inner HOSTNAME + r"(\S+)\s+" # 3: inner APP-NAME + r"\S+\s+" # PROCID (NIL or PID) + r"(\S+)\s+" # 4: inner MSGID + r"(.+)$", # 5: inner SD/MSG remainder +) _SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL) _PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"') _IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip") @@ -184,8 +205,23 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]: ts_raw, decky, service, event_type, sd_rest = m.groups() fields: dict[str, str] = {} - msg: str = "" + # Honeypot SSH PROMPT_COMMAND lines are double-wrapped (Docker + # stdout envelope around the inner ``logger --msgid command`` line). + # Outer MSGID is NIL; the real MSGID is inside the body. Detect + # the inner shape and re-extract HOSTNAME / APP-NAME / MSGID / + # remainder so downstream extraction sees the real header. + if event_type == "-" and sd_rest.startswith("-"): + body = sd_rest[1:].lstrip() + inner = _INNER_RFC5424_RE.match(body) + if inner is not None: + _i_ts, i_host, i_app, i_msgid, i_rest = inner.groups() + decky = i_host + service = i_app + event_type = i_msgid + sd_rest = i_rest + + msg: str = "" if sd_rest.startswith("-"): msg = sd_rest[1:].lstrip() elif sd_rest.startswith("["): @@ -193,9 +229,14 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]: if block: for k, v in _PARAM_RE.findall(block.group(1)): fields[k] = v.replace('\\"', '"').replace("\\\\", "\\").replace("\\]", "]") - msg_match = re.search(r'\]\s+(.+)$', sd_rest) - if msg_match: - msg = msg_match.group(1).strip() + # Always recover the post-SD message tail, even when the SD + # block isn't ``relay@55555`` (e.g. the ``timeQuality`` block + # syslog auto-emits on bash CMD lines). Without this the body + # of unwrapped PROMPT_COMMAND lines stays empty and the + # attacker_ip kv-fallback below has nothing to scan. + msg_match = re.search(r'\]\s+(.+)$', sd_rest) + if msg_match: + msg = msg_match.group(1).strip() else: msg = sd_rest diff --git a/decnet/correlation/parser.py b/decnet/correlation/parser.py index 65ace509..4a80d48c 100644 --- a/decnet/correlation/parser.py +++ b/decnet/correlation/parser.py @@ -32,6 +32,21 @@ _RFC5424_RE = re.compile( r"(.+)$", # 5: SD element + optional MSG ) +# Honeypot SSH PROMPT_COMMAND lines arrive double-wrapped: the +# Docker-stdout collector envelope wraps the inner ``logger +# --rfc5424 --msgid command -t bash …`` line. Outer MSGID is NIL, +# real MSGID lives in the body. Mirrors the unwrap logic in +# ``decnet.collector.worker._INNER_RFC5424_RE`` — the two parsers +# read the same on-wire format. +_INNER_RFC5424_RE = re.compile( + r"^(\d{4}-\d{2}-\d{2}T\S+)\s+" # 1: inner TIMESTAMP + r"(\S+)\s+" # 2: inner HOSTNAME + r"(\S+)\s+" # 3: inner APP-NAME + r"\S+\s+" # PROCID (NIL or PID) + r"(\S+)\s+" # 4: inner MSGID + r"(.+)$", # 5: inner SD/MSG remainder +) + # Structured data block: [relay@55555 k="v" ...] _SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL) @@ -121,6 +136,21 @@ def parse_line(line: str) -> LogEvent | None: ts_raw, decky, service, event_type, sd_rest = m.groups() + # Unwrap double-wrapped Docker-stdout envelopes around bash + # PROMPT_COMMAND lines. See ``_INNER_RFC5424_RE`` and the matching + # logic in ``decnet.collector.worker.parse_rfc5424``. Must run + # before the decky/service NIL-guard below — the OUTER decky is + # the docker host, the inner header carries the real source. + if event_type == "-" and sd_rest.startswith("-"): + body = sd_rest[1:].lstrip() + inner = _INNER_RFC5424_RE.match(body) + if inner is not None: + _i_ts, i_host, i_app, i_msgid, i_rest = inner.groups() + decky = i_host + service = i_app + event_type = i_msgid + sd_rest = i_rest + if decky == "-" or service == "-": return None diff --git a/tests/collector/test_parse_rfc5424_double_wrap.py b/tests/collector/test_parse_rfc5424_double_wrap.py new file mode 100644 index 00000000..3d915e42 --- /dev/null +++ b/tests/collector/test_parse_rfc5424_double_wrap.py @@ -0,0 +1,87 @@ +"""Collector parser unwraps double-wrapped RFC5424 lines. + +Honeypot SSH containers export a ``PROMPT_COMMAND`` that calls +``logger --rfc5424 --msgid command -p user.info -t bash "CMD …"``. +The Docker-stdout reader prepends an outer RFC 5424 envelope around +that inner syslog line. Outer MSGID is NIL, so without an unwrap step +every shell command lands as ``event_type="-"`` in the collector +output and the TTP rule pack never sees it. +""" +from __future__ import annotations + +from decnet.collector.worker import parse_rfc5424 + + +_DOUBLE_WRAPPED_CMD = ( + "<14>1 2026-05-02T06:22:48.089309+00:00 omega-decky 1 - - - " + " 2026-05-02T06:22:48.089286+00:00 SRV-DELTA-77 bash - command " + "[timeQuality tzKnown=\"1\" isSynced=\"1\" syncAccuracy=\"326228\"] " + "CMD uid=0 user=root src=192.168.1.5 pwd=/root cmd=ls /var/www/html" +) + + +def test_double_wrapped_bash_cmd_extracts_inner_msgid() -> None: + p = parse_rfc5424(_DOUBLE_WRAPPED_CMD) + assert p is not None + assert p["event_type"] == "command" + # decky / service come from the INNER header — not the outer + # ``omega-decky`` / ``1`` Docker envelope fields. + assert p["decky"] == "SRV-DELTA-77" + assert p["service"] == "bash" + + +def test_double_wrapped_bash_cmd_recovers_attacker_ip_from_msg() -> None: + p = parse_rfc5424(_DOUBLE_WRAPPED_CMD) + assert p is not None + assert p["attacker_ip"] == "192.168.1.5" + + +def test_double_wrapped_bash_cmd_preserves_full_msg_body() -> None: + p = parse_rfc5424(_DOUBLE_WRAPPED_CMD) + assert p is not None + # The cmd= value lives at the tail of msg; downstream consumers + # (session aggregator, profiler) extract it from there. + assert "cmd=ls /var/www/html" in p["msg"] + + +def test_single_wrapped_relay_line_still_parses_unchanged() -> None: + """Regression guard: non-double-wrapped lines must keep their shape.""" + line = ( + "<134>1 2026-05-02T06:00:25.453826+00:00 omega-decky smtp - " + "disconnect [relay@55555 src_ip=\"192.168.1.5\"]" + ) + p = parse_rfc5424(line) + assert p is not None + assert p["event_type"] == "disconnect" + assert p["decky"] == "omega-decky" + assert p["service"] == "smtp" + assert p["attacker_ip"] == "192.168.1.5" + + +def test_outer_msgid_set_does_not_recurse() -> None: + """When outer MSGID is real, leave it alone — no inner-header lookup.""" + # Synthetic: outer MSGID=auth_attempt, body contains an + # accidental inner-shaped substring. We must NOT replace + # auth_attempt with anything from inside the body. + line = ( + "<134>1 2026-05-02T06:22:48.089309+00:00 omega-decky auth-helper - " + "auth_attempt [relay@55555 username=\"root\" src_ip=\"192.168.1.5\"]" + ) + p = parse_rfc5424(line) + assert p is not None + assert p["event_type"] == "auth_attempt" + assert p["decky"] == "omega-decky" + assert p["service"] == "auth-helper" + + +def test_outer_nil_msgid_with_non_inner_body_unchanged() -> None: + """NIL-MSGID lines whose body isn't a wrapped RFC5424 line stay NIL.""" + # The body is plain prose, not a ` ` head. + line = ( + "<14>1 2026-05-02T06:22:48.000000+00:00 host app - - - " + "Failed password for root from 192.168.1.5 port 42772 ssh2" + ) + p = parse_rfc5424(line) + assert p is not None + assert p["event_type"] == "-" + assert p["attacker_ip"] == "192.168.1.5" diff --git a/tests/correlation/test_parser_double_wrap.py b/tests/correlation/test_parser_double_wrap.py new file mode 100644 index 00000000..4a3dec52 --- /dev/null +++ b/tests/correlation/test_parser_double_wrap.py @@ -0,0 +1,72 @@ +"""Correlation parser unwraps double-wrapped RFC5424 lines. + +Mirrors ``tests/collector/test_parse_rfc5424_double_wrap.py``. Both +parsers read the same on-wire format; the profiler's ``parse_line`` +must agree with the collector's ``parse_rfc5424`` so that +``Attacker.commands`` rows carry the real ``command`` event_type +(not the outer Docker envelope's NIL MSGID). +""" +from __future__ import annotations + +from decnet.correlation.parser import parse_line + + +_DOUBLE_WRAPPED_CMD = ( + "<14>1 2026-05-02T06:22:48.089309+00:00 omega-decky 1 - - - " + " 2026-05-02T06:22:48.089286+00:00 SRV-DELTA-77 bash - command " + "[timeQuality tzKnown=\"1\" isSynced=\"1\" syncAccuracy=\"326228\"] " + "CMD uid=0 user=root src=192.168.1.5 pwd=/root cmd=ls /var/www/html" +) + + +def test_double_wrapped_bash_cmd_event_type_is_command() -> None: + e = parse_line(_DOUBLE_WRAPPED_CMD) + assert e is not None + assert e.event_type == "command" + + +def test_double_wrapped_bash_cmd_uses_inner_decky_and_service() -> None: + e = parse_line(_DOUBLE_WRAPPED_CMD) + assert e is not None + assert e.decky == "SRV-DELTA-77" + assert e.service == "bash" + + +def test_double_wrapped_bash_cmd_extracts_attacker_ip() -> None: + e = parse_line(_DOUBLE_WRAPPED_CMD) + assert e is not None + assert e.attacker_ip == "192.168.1.5" + + +def test_double_wrapped_bash_cmd_extracts_command_field() -> None: + """The behavioral profiler reads ``fields['command']`` for shell + rules and the per-attacker ``commands`` rollup. Without it the + R0001–R0030 pattern rules have no haystack.""" + e = parse_line(_DOUBLE_WRAPPED_CMD) + assert e is not None + assert e.fields.get("command") == "ls /var/www/html" + + +def test_single_wrapped_line_unchanged() -> None: + line = ( + "<134>1 2026-05-02T06:00:25.453826+00:00 omega-decky smtp - " + "disconnect [relay@55555 src_ip=\"192.168.1.5\"]" + ) + e = parse_line(line) + assert e is not None + assert e.event_type == "disconnect" + assert e.decky == "omega-decky" + assert e.service == "smtp" + assert e.attacker_ip == "192.168.1.5" + + +def test_outer_msgid_set_does_not_recurse() -> None: + line = ( + "<134>1 2026-05-02T06:22:48.089309+00:00 omega-decky auth-helper - " + "auth_attempt [relay@55555 username=\"root\" src_ip=\"192.168.1.5\"]" + ) + e = parse_line(line) + assert e is not None + assert e.event_type == "auth_attempt" + assert e.decky == "omega-decky" + assert e.service == "auth-helper"