feat(ttp): split bash CMD evidence into structured uid/user/src/pwd/cmd rows

The inspector was dumping the whole `CMD uid=0 user=root src=… pwd=… cmd=nmap -p- 192.168.1.0/24` syslog body into a single ``command_text`` blob. ANTI: "I'd like to separate the fields." Done — three layers work together: 1. Collector session aggregator: new `_parse_cmd_msg` splits the bash PROMPT_COMMAND msg into `{uid, user, src, pwd, command}`. The session-ended envelope's per-command dict now carries the structured fields, with `command_text` set to just the cmd= value (preserving embedded whitespace — `nmap -p- 1.2.3.0/24` etc.). 2. Rule engine: per-source_kind auxiliary evidence list (`_AUX_EVIDENCE_FIELDS`). For `command` events the engine automatically promotes uid/user/src/pwd into the persisted `evidence` dict on top of the rule's explicit `evidence_fields`. Engine-controlled, not per-rule — adding a new aux field is one line here, not a 30-rule YAML sweep, and rule authors can't accidentally drop it. 3. TTPInspector frontend: evidence renders as a structured `kvs` grid (UID / USER / SRC / PWD / CMD rows) instead of pretty-printed JSON. Primary-order list keeps shell fields at the top; everything else falls below alphabetically so unfamiliar evidence shapes still surface predictably. Tests: - session_aggregator pins the structured-fields emit (uid/user/src/ pwd/command_text without "CMD" prefix, embedded whitespace preserved). - rule_engine_tagger pins the aux-field auto-promotion + the no-`None`-leakage path when payload doesn't carry an aux key.
2026-05-02 03:20:53 -04:00
parent 84699f89da
commit d1c4a48963
6 changed files with 268 additions and 4 deletions
--- a/decnet/collector/worker.py
+++ b/decnet/collector/worker.py
@@ -151,6 +151,33 @@ _SESSION_AGG_TTL_SEC: float = _parse_float_env(
 )


+# Body of a bash PROMPT_COMMAND CMD line:
+#   ``CMD uid=0 user=root src=192.168.1.5 pwd=/root cmd=ls /var/www/html``
+# Splits into the structured fields the inspector renders + the
+# residual ``cmd=`` value (which may itself contain spaces — preserve
+# everything after ``cmd=`` as one token, do NOT word-split).
+_CMD_BODY_HEAD_KV_RE = re.compile(r'(\w+)=(\S+)')
+
+
+def _parse_cmd_msg(msg: str) -> dict[str, str]:
+    """Split a bash CMD msg body into ``{uid, user, src, pwd, command}``.
+
+    Returns the empty dict on a non-CMD msg. ``command`` carries the
+    full post-``cmd=`` rest, including any embedded whitespace —
+    tools like ``nmap -p- 192.168.1.0/24`` would otherwise lose
+    everything after the first space.
+    """
+    if not msg.startswith("CMD "):
+        return {}
+    head, sep, cmd_rest = msg[4:].partition("cmd=")
+    out: dict[str, str] = {}
+    for k, v in _CMD_BODY_HEAD_KV_RE.findall(head):
+        out[k] = v
+    if sep:
+        out["command"] = cmd_rest
+    return out
+
+
 def _parse_iso_ts(value: str) -> Optional[datetime]:
    """Best-effort ISO-8601 parse for parsed event timestamps.

@@ -252,18 +279,30 @@ class _SessionAggregator:
            if cmd_ts.timestamp() < cutoff_lo:
                continue
            cmd_fields = cmd_parsed.get("fields", {}) or {}
+            # Pull structured uid/user/src/pwd/command from the bash
+            # msg body. The inspector renders these as separate
+            # key/value rows, which is much friendlier than dumping
+            # the raw ``CMD uid=0 user=... cmd=...`` string into a
+            # single ``command_text`` blob.
+            parsed_kv = _parse_cmd_msg(str(cmd_parsed.get("msg", "")))
            cmd_text = (
                cmd_fields.get("command")
                or cmd_fields.get("cmd")
+                or parsed_kv.get("command")
                or cmd_parsed.get("msg", "")
            )
-            commands.append({
+            entry: dict[str, Any] = {
                "id": f"{sid}#{idx}" if sid else f"{attacker_ip}-{cmd_ts.isoformat()}",
                "command_text": str(cmd_text),
                "ts": cmd_ts.isoformat(),
                "decky": cmd_parsed.get("decky", ""),
                "service": cmd_parsed.get("service", ""),
-            })
+            }
+            for key in ("uid", "user", "src", "pwd"):
+                value = parsed_kv.get(key) or cmd_fields.get(key)
+                if value is not None:
+                    entry[key] = value
+            commands.append(entry)

        payload: dict[str, Any] = {
            "session_id": sid or None,
--- a/decnet/ttp/impl/rule_engine.py
+++ b/decnet/ttp/impl/rule_engine.py
@@ -296,6 +296,21 @@ _DEFAULT_MATCH_FIELD: dict[str, str] = {
 }


+# Per-``source_kind`` auxiliary evidence fields that the engine
+# auto-promotes onto every emitted tag, on top of the rule's
+# explicit ``evidence_fields`` list. The point is operator UX: when
+# a shell rule fires on ``cat /etc/shadow``, the inspector should
+# show *who* ran it (``user``), *where from* (``src``), *as whom*
+# (``uid``), and the working directory (``pwd``) — without forcing
+# every rule author to add the same four fields to every shell
+# rule's ``evidence_fields`` list. Engine-controlled, not per-rule:
+# adding a new aux field is a one-line edit here, not a 30-rule
+# YAML sweep.
+_AUX_EVIDENCE_FIELDS: dict[str, tuple[str, ...]] = {
+    "command": ("uid", "user", "src", "pwd"),
+}
+
+
 def _evaluate_rules(
    rules: list[CompiledRule], event: TaggerEvent,
 ) -> list[TTPTag]:
@@ -330,6 +345,12 @@ def _evaluate_rules(
                    for field in rule.evidence_fields
                    if field in event.payload
                }
+                # Engine-controlled auxiliary fields per source_kind —
+                # added on top of the rule's explicit list so the
+                # inspector always sees uid/user/src/pwd on shell tags.
+                for aux in _AUX_EVIDENCE_FIELDS.get(event.source_kind, ()):
+                    if aux in event.payload and aux not in evidence:
+                        evidence[aux] = event.payload.get(aux)
                out.append(TTPTag(
                    uuid=tag_uuid,
                    source_kind=event.source_kind,
--- a/decnet_web/src/components/TTPInspector.css
+++ b/decnet_web/src/components/TTPInspector.css
@@ -131,6 +131,36 @@
  overflow-y: auto;
 }

+.ttp-evidence-kvs {
+  background: rgba(0, 0, 0, 0.35);
+  border: 1px solid var(--border);
+  border-radius: 3px;
+  padding: 8px 10px;
+  font-family: var(--mono, ui-monospace, monospace);
+  font-size: 0.74rem;
+  display: grid;
+  grid-template-columns: 60px 1fr;
+  column-gap: 12px;
+  row-gap: 3px;
+  max-height: 320px;
+  overflow-y: auto;
+}
+
+.ttp-evidence-k {
+  color: var(--dim-color);
+  letter-spacing: 1px;
+  text-transform: uppercase;
+  font-size: 0.66rem;
+  align-self: baseline;
+  padding-top: 2px;
+}
+
+.ttp-evidence-v {
+  color: var(--matrix);
+  word-break: break-all;
+  white-space: pre-wrap;
+}
+
 .ttp-empty {
  padding: 24px;
  text-align: center;
--- a/decnet_web/src/components/TTPInspector.tsx
+++ b/decnet_web/src/components/TTPInspector.tsx
@@ -158,8 +158,63 @@ const TTPInspector: React.FC<Props> = ({
  );
 };

+// Evidence keys we promote to the top of the per-card key/value
+// table for shell-command tags. Order matters — these render in
+// the listed order; everything else goes after, alphabetically.
+const _EVIDENCE_PRIMARY_ORDER = [
+  'uid', 'user', 'src', 'pwd', 'cmd', 'command', 'command_text',
+];
+
+const _EVIDENCE_LABEL: Record<string, string> = {
+  uid: 'UID',
+  user: 'USER',
+  src: 'SRC',
+  pwd: 'PWD',
+  cmd: 'CMD',
+  command: 'CMD',
+  command_text: 'CMD',
+};
+
+interface EvidenceRow {
+  key: string;
+  label: string;
+  value: string;
+}
+
+function flattenEvidence(evidence: Record<string, unknown>): EvidenceRow[] {
+  const seen = new Set<string>();
+  const rows: EvidenceRow[] = [];
+  const stringify = (v: unknown): string => {
+    if (v === null || v === undefined) return '—';
+    if (typeof v === 'string') return v;
+    if (typeof v === 'number' || typeof v === 'boolean') return String(v);
+    return JSON.stringify(v);
+  };
+  for (const k of _EVIDENCE_PRIMARY_ORDER) {
+    if (k in evidence && !seen.has(k)) {
+      seen.add(k);
+      rows.push({
+        key: k,
+        label: _EVIDENCE_LABEL[k] ?? k.toUpperCase(),
+        value: stringify(evidence[k]),
+      });
+    }
+  }
+  const remaining = Object.keys(evidence)
+    .filter((k) => !seen.has(k))
+    .sort();
+  for (const k of remaining) {
+    rows.push({
+      key: k,
+      label: _EVIDENCE_LABEL[k] ?? k.toUpperCase(),
+      value: stringify(evidence[k]),
+    });
+  }
+  return rows;
+}
+
 const TTPTagCard: React.FC<{ row: TTPTagDetailRow }> = ({ row }) => {
-  const evidenceText = JSON.stringify(row.evidence ?? {}, null, 2);
+  const evidenceRows = flattenEvidence(row.evidence ?? {});
  return (
    <div className="ttp-tag-card">
      <div className="ttp-card-head">
@@ -186,7 +241,18 @@ const TTPTagCard: React.FC<{ row: TTPTagDetailRow }> = ({ row }) => {
        <div className="k">ATT&CK</div>
        <div className="v">{row.attack_release}</div>
      </div>
-      <pre className="ttp-evidence">{evidenceText}</pre>
+      {evidenceRows.length === 0 ? (
+        <div className="ttp-empty" style={{ padding: '8px' }}>—</div>
+      ) : (
+        <div className="ttp-evidence-kvs">
+          {evidenceRows.map((r) => (
+            <React.Fragment key={r.key}>
+              <div className="ttp-evidence-k">{r.label}</div>
+              <div className="ttp-evidence-v">{r.value}</div>
+            </React.Fragment>
+          ))}
+        </div>
+      )}
    </div>
  );
 };
--- a/tests/collector/test_session_aggregator.py
+++ b/tests/collector/test_session_aggregator.py
@@ -31,6 +31,25 @@ def _cmd(ts_iso: str, text: str) -> dict[str, Any]:
    }


+def _raw_cmd(ts_iso: str, msg: str) -> dict[str, Any]:
+    """Parsed event whose bash CMD body is in ``msg``, fields={}.
+
+    Mirrors what the unmodified collector parser produces for
+    PROMPT_COMMAND lines (the parser deliberately keeps fields empty
+    so the frontend pill rendering doesn't double-up). The aggregator
+    now extracts uid/user/src/pwd/command from that msg body.
+    """
+    return {
+        "timestamp": ts_iso,
+        "decky": "SRV-DELTA-77",
+        "service": "bash",
+        "event_type": "command",
+        "attacker_ip": _ATTACKER_IP,
+        "fields": {},
+        "msg": msg,
+    }
+
+
 def _session_recorded(
    ts_iso: str, sid: str, duration_s: float = 60.0,
 ) -> dict[str, Any]:
@@ -196,6 +215,36 @@ def test_ttl_eviction_drops_old_commands() -> None:
    assert remaining == ["fresh"]


+def test_session_emits_structured_uid_user_src_pwd_when_msg_carries_them(
+    aggregator: _SessionAggregator,
+    captured_publishes: list[tuple[str, dict[str, Any], str]],
+) -> None:
+    """The bash PROMPT_COMMAND msg body splits into structured fields.
+
+    Pins the "inspector wants UID/SRC/PWD/CMD on separate rows"
+    contract. Without this the inspector sees one big
+    ``CMD uid=0 user=root src=… cmd=…`` string and operators have to
+    eyeball the cmd= portion out of the prefix garbage.
+    """
+    aggregator.add_event(_raw_cmd(
+        "2026-05-02T06:22:48",
+        "CMD uid=0 user=root src=192.168.1.5 pwd=/root "
+        "cmd=nmap -p- 192.168.1.0/24",
+    ))
+    aggregator.add_event(_session_recorded(
+        "2026-05-02T06:23:00", sid="sess-x", duration_s=120.0,
+    ))
+    payload = captured_publishes[0][1]
+    cmd = payload["commands"][0]
+    assert cmd["uid"] == "0"
+    assert cmd["user"] == "root"
+    assert cmd["src"] == "192.168.1.5"
+    assert cmd["pwd"] == "/root"
+    # ``command_text`` is the cmd= value, NOT the full "CMD uid=…" line.
+    # nmap's command line carries spaces — we must preserve them.
+    assert cmd["command_text"] == "nmap -p- 192.168.1.0/24"
+
+
 def test_publish_failure_is_swallowed() -> None:
    """A blowing-up publish must not propagate into the stream thread."""
    def _bad(_t: str, _p: dict[str, Any], _e: str) -> None:
--- a/tests/ttp/test_rule_engine_tagger.py
+++ b/tests/ttp/test_rule_engine_tagger.py
@@ -120,6 +120,65 @@ def test_get_tagger_includes_rule_engine_tagger_first(
    assert names[0] == "rule_engine"


+@pytest.mark.asyncio
+async def test_engine_auto_promotes_uid_user_src_pwd_into_evidence() -> None:
+    """Shell-rule evidence should always carry uid/user/src/pwd.
+
+    The rule's ``evidence_fields: [command_text]`` is unchanged; the
+    engine adds the four shell-aux keys when ``source_kind="command"``
+    so the inspector renders structured rows without forcing every
+    rule author to repeat the same evidence_fields list.
+    """
+    rule = _rule(match_spec={"field": "command_text", "pattern": r"\bcat\b"})
+    store = StubRuleStore(compiled=[rule])
+    tagger = RuleEngineTagger(store)
+    await tagger._engine._index.hydrate_from(store, predicate=_is_engine_owned)
+    event = TaggerEvent(
+        source_kind="command",
+        source_id="cmd-1",
+        attacker_uuid="att-1",
+        identity_uuid=None,
+        session_id="sess-1",
+        decky_id="omega-decky",
+        payload={
+            "command_text": "cat /etc/shadow",
+            "uid": "0",
+            "user": "root",
+            "src": "192.168.1.5",
+            "pwd": "/root",
+        },
+    )
+    tags = await tagger.tag(event)
+    assert len(tags) == 1
+    ev = tags[0].evidence
+    assert ev["command_text"] == "cat /etc/shadow"
+    assert ev["uid"] == "0"
+    assert ev["user"] == "root"
+    assert ev["src"] == "192.168.1.5"
+    assert ev["pwd"] == "/root"
+
+
+@pytest.mark.asyncio
+async def test_engine_aux_fields_skip_missing_payload_keys() -> None:
+    """Missing aux keys don't appear in evidence (no ``None`` values)."""
+    rule = _rule(match_spec={"field": "command_text", "pattern": r"\bcat\b"})
+    store = StubRuleStore(compiled=[rule])
+    tagger = RuleEngineTagger(store)
+    await tagger._engine._index.hydrate_from(store, predicate=_is_engine_owned)
+    event = TaggerEvent(
+        source_kind="command",
+        source_id="cmd-1",
+        attacker_uuid="att-1",
+        identity_uuid=None,
+        session_id=None,
+        decky_id=None,
+        payload={"command_text": "cat /etc/shadow"},
+    )
+    tags = await tagger.tag(event)
+    ev = tags[0].evidence
+    assert ev == {"command_text": "cat /etc/shadow"}
+
+
 def test_rule_engine_tagger_is_in_iter_watchables() -> None:
    store = StubRuleStore()
    engine_tagger = RuleEngineTagger(store)