feat(ttp): split bash CMD evidence into structured uid/user/src/pwd/cmd rows

The inspector was dumping the whole `CMD uid=0 user=root src=… pwd=… cmd=nmap -p- 192.168.1.0/24` syslog body into a single ``command_text`` blob. ANTI: "I'd like to separate the fields." Done — three layers work together: 1. Collector session aggregator: new `_parse_cmd_msg` splits the bash PROMPT_COMMAND msg into `{uid, user, src, pwd, command}`. The session-ended envelope's per-command dict now carries the structured fields, with `command_text` set to just the cmd= value (preserving embedded whitespace — `nmap -p- 1.2.3.0/24` etc.). 2. Rule engine: per-source_kind auxiliary evidence list (`_AUX_EVIDENCE_FIELDS`). For `command` events the engine automatically promotes uid/user/src/pwd into the persisted `evidence` dict on top of the rule's explicit `evidence_fields`. Engine-controlled, not per-rule — adding a new aux field is one line here, not a 30-rule YAML sweep, and rule authors can't accidentally drop it. 3. TTPInspector frontend: evidence renders as a structured `kvs` grid (UID / USER / SRC / PWD / CMD rows) instead of pretty-printed JSON. Primary-order list keeps shell fields at the top; everything else falls below alphabetically so unfamiliar evidence shapes still surface predictably. Tests: - session_aggregator pins the structured-fields emit (uid/user/src/ pwd/command_text without "CMD" prefix, embedded whitespace preserved). - rule_engine_tagger pins the aux-field auto-promotion + the no-`None`-leakage path when payload doesn't carry an aux key.
2026-05-02 03:20:53 -04:00
parent 84699f89da
commit d1c4a48963
6 changed files with 268 additions and 4 deletions
--- a/tests/collector/test_session_aggregator.py
+++ b/tests/collector/test_session_aggregator.py
@@ -31,6 +31,25 @@ def _cmd(ts_iso: str, text: str) -> dict[str, Any]:
    }


+def _raw_cmd(ts_iso: str, msg: str) -> dict[str, Any]:
+    """Parsed event whose bash CMD body is in ``msg``, fields={}.
+
+    Mirrors what the unmodified collector parser produces for
+    PROMPT_COMMAND lines (the parser deliberately keeps fields empty
+    so the frontend pill rendering doesn't double-up). The aggregator
+    now extracts uid/user/src/pwd/command from that msg body.
+    """
+    return {
+        "timestamp": ts_iso,
+        "decky": "SRV-DELTA-77",
+        "service": "bash",
+        "event_type": "command",
+        "attacker_ip": _ATTACKER_IP,
+        "fields": {},
+        "msg": msg,
+    }
+
+
 def _session_recorded(
    ts_iso: str, sid: str, duration_s: float = 60.0,
 ) -> dict[str, Any]:
@@ -196,6 +215,36 @@ def test_ttl_eviction_drops_old_commands() -> None:
    assert remaining == ["fresh"]


+def test_session_emits_structured_uid_user_src_pwd_when_msg_carries_them(
+    aggregator: _SessionAggregator,
+    captured_publishes: list[tuple[str, dict[str, Any], str]],
+) -> None:
+    """The bash PROMPT_COMMAND msg body splits into structured fields.
+
+    Pins the "inspector wants UID/SRC/PWD/CMD on separate rows"
+    contract. Without this the inspector sees one big
+    ``CMD uid=0 user=root src=… cmd=…`` string and operators have to
+    eyeball the cmd= portion out of the prefix garbage.
+    """
+    aggregator.add_event(_raw_cmd(
+        "2026-05-02T06:22:48",
+        "CMD uid=0 user=root src=192.168.1.5 pwd=/root "
+        "cmd=nmap -p- 192.168.1.0/24",
+    ))
+    aggregator.add_event(_session_recorded(
+        "2026-05-02T06:23:00", sid="sess-x", duration_s=120.0,
+    ))
+    payload = captured_publishes[0][1]
+    cmd = payload["commands"][0]
+    assert cmd["uid"] == "0"
+    assert cmd["user"] == "root"
+    assert cmd["src"] == "192.168.1.5"
+    assert cmd["pwd"] == "/root"
+    # ``command_text`` is the cmd= value, NOT the full "CMD uid=…" line.
+    # nmap's command line carries spaces — we must preserve them.
+    assert cmd["command_text"] == "nmap -p- 192.168.1.0/24"
+
+
 def test_publish_failure_is_swallowed() -> None:
    """A blowing-up publish must not propagate into the stream thread."""
    def _bad(_t: str, _p: dict[str, Any], _e: str) -> None: