Files
DECNET/tests/correlation/test_parser_double_wrap.py
anti d9d2a80573 fix(collector): unwrap double-wrapped RFC5424 around bash PROMPT_COMMAND
Honeypot SSH containers run `PROMPT_COMMAND` that calls
`logger --rfc5424 --msgid command -t bash "CMD …"`. The Docker-stdout
reader prepends an outer RFC5424 envelope (HOSTNAME=<decky>,
APP-NAME=1, MSGID=NIL) around that inner syslog line. Both the
collector parser (`parse_rfc5424`) and the correlation parser
(`parse_line`) saw the outer NIL MSGID and emitted `event_type="-"`
for every shell command — which:
  - kept `Attacker.commands` rows missing `command_text`
  - left R0001–R0030 (the pattern rule pack that matches shell
    commands) with no haystack
  - made `decnet.collector.log` show `event written … type=-`
    for the very lines that should be `type=command`

Both parsers now detect the inner-RFC5424 shape (`<TS> <HOST> <APP>
<PROCID> <MSGID> <rest>`) when the outer MSGID is NIL and the SD-arm
is also NIL, and re-extract HOSTNAME / APP-NAME / MSGID / remainder
from the body. The collector parser also recovers the post-SD msg
tail when the SD block isn't `relay@55555` (the bash CMD line carries
a `[timeQuality …]` block) so the kv-fallback can find `src_ip`.

Mirroring tests in tests/collector and tests/correlation pin both
the unwrap and the regression guard for non-double-wrapped lines.
2026-05-02 02:32:21 -04:00

73 lines
2.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Correlation parser unwraps double-wrapped RFC5424 lines.
Mirrors ``tests/collector/test_parse_rfc5424_double_wrap.py``. Both
parsers read the same on-wire format; the profiler's ``parse_line``
must agree with the collector's ``parse_rfc5424`` so that
``Attacker.commands`` rows carry the real ``command`` event_type
(not the outer Docker envelope's NIL MSGID).
"""
from __future__ import annotations
from decnet.correlation.parser import parse_line
_DOUBLE_WRAPPED_CMD = (
"<14>1 2026-05-02T06:22:48.089309+00:00 omega-decky 1 - - - "
" 2026-05-02T06:22:48.089286+00:00 SRV-DELTA-77 bash - command "
"[timeQuality tzKnown=\"1\" isSynced=\"1\" syncAccuracy=\"326228\"] "
"CMD uid=0 user=root src=192.168.1.5 pwd=/root cmd=ls /var/www/html"
)
def test_double_wrapped_bash_cmd_event_type_is_command() -> None:
e = parse_line(_DOUBLE_WRAPPED_CMD)
assert e is not None
assert e.event_type == "command"
def test_double_wrapped_bash_cmd_uses_inner_decky_and_service() -> None:
e = parse_line(_DOUBLE_WRAPPED_CMD)
assert e is not None
assert e.decky == "SRV-DELTA-77"
assert e.service == "bash"
def test_double_wrapped_bash_cmd_extracts_attacker_ip() -> None:
e = parse_line(_DOUBLE_WRAPPED_CMD)
assert e is not None
assert e.attacker_ip == "192.168.1.5"
def test_double_wrapped_bash_cmd_extracts_command_field() -> None:
"""The behavioral profiler reads ``fields['command']`` for shell
rules and the per-attacker ``commands`` rollup. Without it the
R0001R0030 pattern rules have no haystack."""
e = parse_line(_DOUBLE_WRAPPED_CMD)
assert e is not None
assert e.fields.get("command") == "ls /var/www/html"
def test_single_wrapped_line_unchanged() -> None:
line = (
"<134>1 2026-05-02T06:00:25.453826+00:00 omega-decky smtp - "
"disconnect [relay@55555 src_ip=\"192.168.1.5\"]"
)
e = parse_line(line)
assert e is not None
assert e.event_type == "disconnect"
assert e.decky == "omega-decky"
assert e.service == "smtp"
assert e.attacker_ip == "192.168.1.5"
def test_outer_msgid_set_does_not_recurse() -> None:
line = (
"<134>1 2026-05-02T06:22:48.089309+00:00 omega-decky auth-helper - "
"auth_attempt [relay@55555 username=\"root\" src_ip=\"192.168.1.5\"]"
)
e = parse_line(line)
assert e is not None
assert e.event_type == "auth_attempt"
assert e.decky == "omega-decky"
assert e.service == "auth-helper"