From 8bdc5b98c9b476fdc7aa886bedec0f1617f2f875 Mon Sep 17 00:00:00 2001 From: anti Date: Sat, 18 Apr 2026 05:37:08 -0400 Subject: [PATCH] feat(collector): parse real PROCID and extract IPs from logger kv pairs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Relaxed RFC 5424 regex to accept either NILVALUE or a numeric PROCID; sshd / sudo go through rsyslog with their real PID, while syslog_bridge emitters keep using '-'. - Added a fallback pass that scans the MSG body for IP-shaped key=value tokens. This rescues attacker attribution for plain logger callers like the SSH PROMPT_COMMAND shim, which emits 'CMD … src=IP …' without SD-element params. --- decnet/collector/worker.py | 24 +++++++++++++++++++++- tests/test_collector.py | 42 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/decnet/collector/worker.py b/decnet/collector/worker.py index bb87c74..3234afc 100644 --- a/decnet/collector/worker.py +++ b/decnet/collector/worker.py @@ -110,7 +110,9 @@ _RFC5424_RE = re.compile( r"(\S+) " # 1: TIMESTAMP r"(\S+) " # 2: HOSTNAME (decky name) r"(\S+) " # 3: APP-NAME (service) - r"- " # PROCID always NILVALUE + r"\S+ " # PROCID — NILVALUE ("-") for syslog_bridge emitters, + # real PID for native syslog callers like sshd/sudo + # routed through rsyslog. Accept both; we don't consume it. r"(\S+) " # 4: MSGID (event_type) r"(.+)$", # 5: SD element + optional MSG ) @@ -118,6 +120,13 @@ _SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL) _PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"') _IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip") +# Free-form `key=value` pairs in the MSG body. Used for lines that bypass the +# syslog_bridge SD format — e.g. the SSH container's PROMPT_COMMAND which +# calls `logger -t bash "CMD uid=0 user=root src=1.2.3.4 pwd=/root cmd=…"`. +# Values run until the next whitespace, so `cmd=…` at end-of-line is preserved +# as one unit; we only care about IP-shaped fields here anyway. +_MSG_KV_RE = re.compile(r'(\w+)=(\S+)') + def parse_rfc5424(line: str) -> Optional[dict[str, Any]]: """ @@ -151,6 +160,19 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]: attacker_ip = fields[fname] break + # Fallback for plain `logger` callers that don't use SD params (notably + # the SSH container's bash PROMPT_COMMAND: `logger -t bash "CMD … src=IP …"`). + # Scan the MSG body for IP-shaped `key=value` tokens ONLY — don't fold + # them into `fields`, because the frontend's parseEventBody already + # renders kv pairs from the msg and doubling them up produces noisy + # duplicate pills. This keeps attacker attribution working without + # changing the shape of `fields` for non-SD lines. + if attacker_ip == "Unknown" and msg: + for k, v in _MSG_KV_RE.findall(msg): + if k in _IP_FIELDS: + attacker_ip = v + break + try: ts_formatted = datetime.fromisoformat(ts_raw).strftime("%Y-%m-%d %H:%M:%S") except ValueError: diff --git a/tests/test_collector.py b/tests/test_collector.py index 3cbec8f..bcef1dd 100644 --- a/tests/test_collector.py +++ b/tests/test_collector.py @@ -79,6 +79,48 @@ class TestParseRfc5424: result = parse_rfc5424(line) assert result["attacker_ip"] == "Unknown" + def test_parses_line_with_real_procid(self): + """sshd/sudo log via native syslog, so rsyslog fills PROCID with the + real PID instead of NILVALUE. The parser must accept either form.""" + line = ( + "<38>1 2026-04-18T08:27:21.862365+00:00 omega-decky sshd 940 - - " + "Accepted password for root from 192.168.1.5 port 43210 ssh2" + ) + result = parse_rfc5424(line) + assert result is not None + assert result["decky"] == "omega-decky" + assert result["service"] == "sshd" + assert "Accepted password" in result["msg"] + assert result["attacker_ip"] == "Unknown" # no key=value in this msg + + def test_extracts_attacker_ip_from_msg_body_kv(self): + """SSH container's bash PROMPT_COMMAND uses `logger -t bash "CMD ... src=IP ..."` + which produces an RFC 5424 line with NILVALUE SD — the IP lives in the + free-form msg, not in SD params. The collector should still pick it up.""" + line = ( + "<134>1 2024-01-15T12:00:00+00:00 decky-01 bash - - - " + "CMD uid=0 user=root src=198.51.100.7 pwd=/root cmd=ls -la" + ) + result = parse_rfc5424(line) + assert result is not None + assert result["attacker_ip"] == "198.51.100.7" + # `fields` stays empty — the frontend's parseEventBody renders kv + # pairs straight from msg; we don't want duplicate pills. + assert result["fields"] == {} + assert "CMD uid=0" in result["msg"] + + def test_sd_ip_wins_over_msg_body(self): + """If SD params carry an IP, the msg-body fallback must not overwrite it.""" + line = ( + '<134>1 2024-01-15T12:00:00+00:00 decky-01 ssh - login ' + '[relay@55555 src_ip="1.2.3.4"] rogue src=9.9.9.9 entry' + ) + result = parse_rfc5424(line) + assert result["attacker_ip"] == "1.2.3.4" + # SD wins; `src=` from msg isn't folded into fields (msg retains it). + assert result["fields"]["src_ip"] == "1.2.3.4" + assert "src" not in result["fields"] + def test_parses_msg(self): line = self._make_line(msg="hello world") result = parse_rfc5424(line)