Honeypot SSH containers run `PROMPT_COMMAND` that calls
`logger --rfc5424 --msgid command -t bash "CMD …"`. The Docker-stdout
reader prepends an outer RFC5424 envelope (HOSTNAME=<decky>,
APP-NAME=1, MSGID=NIL) around that inner syslog line. Both the
collector parser (`parse_rfc5424`) and the correlation parser
(`parse_line`) saw the outer NIL MSGID and emitted `event_type="-"`
for every shell command — which:
- kept `Attacker.commands` rows missing `command_text`
- left R0001–R0030 (the pattern rule pack that matches shell
commands) with no haystack
- made `decnet.collector.log` show `event written … type=-`
for the very lines that should be `type=command`
Both parsers now detect the inner-RFC5424 shape (`<TS> <HOST> <APP>
<PROCID> <MSGID> <rest>`) when the outer MSGID is NIL and the SD-arm
is also NIL, and re-extract HOSTNAME / APP-NAME / MSGID / remainder
from the body. The collector parser also recovers the post-SD msg
tail when the SD block isn't `relay@55555` (the bash CMD line carries
a `[timeQuality …]` block) so the kv-fallback can find `src_ip`.
Mirroring tests in tests/collector and tests/correlation pin both
the unwrap and the regression guard for non-double-wrapped lines.
73 lines
2.4 KiB
Python
73 lines
2.4 KiB
Python
"""Correlation parser unwraps double-wrapped RFC5424 lines.
|
||
|
||
Mirrors ``tests/collector/test_parse_rfc5424_double_wrap.py``. Both
|
||
parsers read the same on-wire format; the profiler's ``parse_line``
|
||
must agree with the collector's ``parse_rfc5424`` so that
|
||
``Attacker.commands`` rows carry the real ``command`` event_type
|
||
(not the outer Docker envelope's NIL MSGID).
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
from decnet.correlation.parser import parse_line
|
||
|
||
|
||
_DOUBLE_WRAPPED_CMD = (
|
||
"<14>1 2026-05-02T06:22:48.089309+00:00 omega-decky 1 - - - "
|
||
" 2026-05-02T06:22:48.089286+00:00 SRV-DELTA-77 bash - command "
|
||
"[timeQuality tzKnown=\"1\" isSynced=\"1\" syncAccuracy=\"326228\"] "
|
||
"CMD uid=0 user=root src=192.168.1.5 pwd=/root cmd=ls /var/www/html"
|
||
)
|
||
|
||
|
||
def test_double_wrapped_bash_cmd_event_type_is_command() -> None:
|
||
e = parse_line(_DOUBLE_WRAPPED_CMD)
|
||
assert e is not None
|
||
assert e.event_type == "command"
|
||
|
||
|
||
def test_double_wrapped_bash_cmd_uses_inner_decky_and_service() -> None:
|
||
e = parse_line(_DOUBLE_WRAPPED_CMD)
|
||
assert e is not None
|
||
assert e.decky == "SRV-DELTA-77"
|
||
assert e.service == "bash"
|
||
|
||
|
||
def test_double_wrapped_bash_cmd_extracts_attacker_ip() -> None:
|
||
e = parse_line(_DOUBLE_WRAPPED_CMD)
|
||
assert e is not None
|
||
assert e.attacker_ip == "192.168.1.5"
|
||
|
||
|
||
def test_double_wrapped_bash_cmd_extracts_command_field() -> None:
|
||
"""The behavioral profiler reads ``fields['command']`` for shell
|
||
rules and the per-attacker ``commands`` rollup. Without it the
|
||
R0001–R0030 pattern rules have no haystack."""
|
||
e = parse_line(_DOUBLE_WRAPPED_CMD)
|
||
assert e is not None
|
||
assert e.fields.get("command") == "ls /var/www/html"
|
||
|
||
|
||
def test_single_wrapped_line_unchanged() -> None:
|
||
line = (
|
||
"<134>1 2026-05-02T06:00:25.453826+00:00 omega-decky smtp - "
|
||
"disconnect [relay@55555 src_ip=\"192.168.1.5\"]"
|
||
)
|
||
e = parse_line(line)
|
||
assert e is not None
|
||
assert e.event_type == "disconnect"
|
||
assert e.decky == "omega-decky"
|
||
assert e.service == "smtp"
|
||
assert e.attacker_ip == "192.168.1.5"
|
||
|
||
|
||
def test_outer_msgid_set_does_not_recurse() -> None:
|
||
line = (
|
||
"<134>1 2026-05-02T06:22:48.089309+00:00 omega-decky auth-helper - "
|
||
"auth_attempt [relay@55555 username=\"root\" src_ip=\"192.168.1.5\"]"
|
||
)
|
||
e = parse_line(line)
|
||
assert e is not None
|
||
assert e.event_type == "auth_attempt"
|
||
assert e.decky == "omega-decky"
|
||
assert e.service == "auth-helper"
|