From d1c4a489630113f7591ea55aec522adea3649cf6 Mon Sep 17 00:00:00 2001 From: anti Date: Sat, 2 May 2026 03:20:53 -0400 Subject: [PATCH] feat(ttp): split bash CMD evidence into structured uid/user/src/pwd/cmd rows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The inspector was dumping the whole `CMD uid=0 user=root src=… pwd=… cmd=nmap -p- 192.168.1.0/24` syslog body into a single ``command_text`` blob. ANTI: "I'd like to separate the fields." Done — three layers work together: 1. Collector session aggregator: new `_parse_cmd_msg` splits the bash PROMPT_COMMAND msg into `{uid, user, src, pwd, command}`. The session-ended envelope's per-command dict now carries the structured fields, with `command_text` set to just the cmd= value (preserving embedded whitespace — `nmap -p- 1.2.3.0/24` etc.). 2. Rule engine: per-source_kind auxiliary evidence list (`_AUX_EVIDENCE_FIELDS`). For `command` events the engine automatically promotes uid/user/src/pwd into the persisted `evidence` dict on top of the rule's explicit `evidence_fields`. Engine-controlled, not per-rule — adding a new aux field is one line here, not a 30-rule YAML sweep, and rule authors can't accidentally drop it. 3. TTPInspector frontend: evidence renders as a structured `kvs` grid (UID / USER / SRC / PWD / CMD rows) instead of pretty-printed JSON. Primary-order list keeps shell fields at the top; everything else falls below alphabetically so unfamiliar evidence shapes still surface predictably. Tests: - session_aggregator pins the structured-fields emit (uid/user/src/ pwd/command_text without "CMD" prefix, embedded whitespace preserved). - rule_engine_tagger pins the aux-field auto-promotion + the no-`None`-leakage path when payload doesn't carry an aux key. --- decnet/collector/worker.py | 43 ++++++++++++- decnet/ttp/impl/rule_engine.py | 21 +++++++ decnet_web/src/components/TTPInspector.css | 30 ++++++++++ decnet_web/src/components/TTPInspector.tsx | 70 +++++++++++++++++++++- tests/collector/test_session_aggregator.py | 49 +++++++++++++++ tests/ttp/test_rule_engine_tagger.py | 59 ++++++++++++++++++ 6 files changed, 268 insertions(+), 4 deletions(-) diff --git a/decnet/collector/worker.py b/decnet/collector/worker.py index 351b36ee..e57b9853 100644 --- a/decnet/collector/worker.py +++ b/decnet/collector/worker.py @@ -151,6 +151,33 @@ _SESSION_AGG_TTL_SEC: float = _parse_float_env( ) +# Body of a bash PROMPT_COMMAND CMD line: +# ``CMD uid=0 user=root src=192.168.1.5 pwd=/root cmd=ls /var/www/html`` +# Splits into the structured fields the inspector renders + the +# residual ``cmd=`` value (which may itself contain spaces — preserve +# everything after ``cmd=`` as one token, do NOT word-split). +_CMD_BODY_HEAD_KV_RE = re.compile(r'(\w+)=(\S+)') + + +def _parse_cmd_msg(msg: str) -> dict[str, str]: + """Split a bash CMD msg body into ``{uid, user, src, pwd, command}``. + + Returns the empty dict on a non-CMD msg. ``command`` carries the + full post-``cmd=`` rest, including any embedded whitespace — + tools like ``nmap -p- 192.168.1.0/24`` would otherwise lose + everything after the first space. + """ + if not msg.startswith("CMD "): + return {} + head, sep, cmd_rest = msg[4:].partition("cmd=") + out: dict[str, str] = {} + for k, v in _CMD_BODY_HEAD_KV_RE.findall(head): + out[k] = v + if sep: + out["command"] = cmd_rest + return out + + def _parse_iso_ts(value: str) -> Optional[datetime]: """Best-effort ISO-8601 parse for parsed event timestamps. @@ -252,18 +279,30 @@ class _SessionAggregator: if cmd_ts.timestamp() < cutoff_lo: continue cmd_fields = cmd_parsed.get("fields", {}) or {} + # Pull structured uid/user/src/pwd/command from the bash + # msg body. The inspector renders these as separate + # key/value rows, which is much friendlier than dumping + # the raw ``CMD uid=0 user=... cmd=...`` string into a + # single ``command_text`` blob. + parsed_kv = _parse_cmd_msg(str(cmd_parsed.get("msg", ""))) cmd_text = ( cmd_fields.get("command") or cmd_fields.get("cmd") + or parsed_kv.get("command") or cmd_parsed.get("msg", "") ) - commands.append({ + entry: dict[str, Any] = { "id": f"{sid}#{idx}" if sid else f"{attacker_ip}-{cmd_ts.isoformat()}", "command_text": str(cmd_text), "ts": cmd_ts.isoformat(), "decky": cmd_parsed.get("decky", ""), "service": cmd_parsed.get("service", ""), - }) + } + for key in ("uid", "user", "src", "pwd"): + value = parsed_kv.get(key) or cmd_fields.get(key) + if value is not None: + entry[key] = value + commands.append(entry) payload: dict[str, Any] = { "session_id": sid or None, diff --git a/decnet/ttp/impl/rule_engine.py b/decnet/ttp/impl/rule_engine.py index 37e2bac4..b5e09fc3 100644 --- a/decnet/ttp/impl/rule_engine.py +++ b/decnet/ttp/impl/rule_engine.py @@ -296,6 +296,21 @@ _DEFAULT_MATCH_FIELD: dict[str, str] = { } +# Per-``source_kind`` auxiliary evidence fields that the engine +# auto-promotes onto every emitted tag, on top of the rule's +# explicit ``evidence_fields`` list. The point is operator UX: when +# a shell rule fires on ``cat /etc/shadow``, the inspector should +# show *who* ran it (``user``), *where from* (``src``), *as whom* +# (``uid``), and the working directory (``pwd``) — without forcing +# every rule author to add the same four fields to every shell +# rule's ``evidence_fields`` list. Engine-controlled, not per-rule: +# adding a new aux field is a one-line edit here, not a 30-rule +# YAML sweep. +_AUX_EVIDENCE_FIELDS: dict[str, tuple[str, ...]] = { + "command": ("uid", "user", "src", "pwd"), +} + + def _evaluate_rules( rules: list[CompiledRule], event: TaggerEvent, ) -> list[TTPTag]: @@ -330,6 +345,12 @@ def _evaluate_rules( for field in rule.evidence_fields if field in event.payload } + # Engine-controlled auxiliary fields per source_kind — + # added on top of the rule's explicit list so the + # inspector always sees uid/user/src/pwd on shell tags. + for aux in _AUX_EVIDENCE_FIELDS.get(event.source_kind, ()): + if aux in event.payload and aux not in evidence: + evidence[aux] = event.payload.get(aux) out.append(TTPTag( uuid=tag_uuid, source_kind=event.source_kind, diff --git a/decnet_web/src/components/TTPInspector.css b/decnet_web/src/components/TTPInspector.css index 1bf690fd..8a508829 100644 --- a/decnet_web/src/components/TTPInspector.css +++ b/decnet_web/src/components/TTPInspector.css @@ -131,6 +131,36 @@ overflow-y: auto; } +.ttp-evidence-kvs { + background: rgba(0, 0, 0, 0.35); + border: 1px solid var(--border); + border-radius: 3px; + padding: 8px 10px; + font-family: var(--mono, ui-monospace, monospace); + font-size: 0.74rem; + display: grid; + grid-template-columns: 60px 1fr; + column-gap: 12px; + row-gap: 3px; + max-height: 320px; + overflow-y: auto; +} + +.ttp-evidence-k { + color: var(--dim-color); + letter-spacing: 1px; + text-transform: uppercase; + font-size: 0.66rem; + align-self: baseline; + padding-top: 2px; +} + +.ttp-evidence-v { + color: var(--matrix); + word-break: break-all; + white-space: pre-wrap; +} + .ttp-empty { padding: 24px; text-align: center; diff --git a/decnet_web/src/components/TTPInspector.tsx b/decnet_web/src/components/TTPInspector.tsx index d1a7b1d4..b1eda3b9 100644 --- a/decnet_web/src/components/TTPInspector.tsx +++ b/decnet_web/src/components/TTPInspector.tsx @@ -158,8 +158,63 @@ const TTPInspector: React.FC = ({ ); }; +// Evidence keys we promote to the top of the per-card key/value +// table for shell-command tags. Order matters — these render in +// the listed order; everything else goes after, alphabetically. +const _EVIDENCE_PRIMARY_ORDER = [ + 'uid', 'user', 'src', 'pwd', 'cmd', 'command', 'command_text', +]; + +const _EVIDENCE_LABEL: Record = { + uid: 'UID', + user: 'USER', + src: 'SRC', + pwd: 'PWD', + cmd: 'CMD', + command: 'CMD', + command_text: 'CMD', +}; + +interface EvidenceRow { + key: string; + label: string; + value: string; +} + +function flattenEvidence(evidence: Record): EvidenceRow[] { + const seen = new Set(); + const rows: EvidenceRow[] = []; + const stringify = (v: unknown): string => { + if (v === null || v === undefined) return '—'; + if (typeof v === 'string') return v; + if (typeof v === 'number' || typeof v === 'boolean') return String(v); + return JSON.stringify(v); + }; + for (const k of _EVIDENCE_PRIMARY_ORDER) { + if (k in evidence && !seen.has(k)) { + seen.add(k); + rows.push({ + key: k, + label: _EVIDENCE_LABEL[k] ?? k.toUpperCase(), + value: stringify(evidence[k]), + }); + } + } + const remaining = Object.keys(evidence) + .filter((k) => !seen.has(k)) + .sort(); + for (const k of remaining) { + rows.push({ + key: k, + label: _EVIDENCE_LABEL[k] ?? k.toUpperCase(), + value: stringify(evidence[k]), + }); + } + return rows; +} + const TTPTagCard: React.FC<{ row: TTPTagDetailRow }> = ({ row }) => { - const evidenceText = JSON.stringify(row.evidence ?? {}, null, 2); + const evidenceRows = flattenEvidence(row.evidence ?? {}); return (
@@ -186,7 +241,18 @@ const TTPTagCard: React.FC<{ row: TTPTagDetailRow }> = ({ row }) => {
ATT&CK
{row.attack_release}
-
{evidenceText}
+ {evidenceRows.length === 0 ? ( +
+ ) : ( +
+ {evidenceRows.map((r) => ( + +
{r.label}
+
{r.value}
+
+ ))} +
+ )}
); }; diff --git a/tests/collector/test_session_aggregator.py b/tests/collector/test_session_aggregator.py index 783939a1..64c754b5 100644 --- a/tests/collector/test_session_aggregator.py +++ b/tests/collector/test_session_aggregator.py @@ -31,6 +31,25 @@ def _cmd(ts_iso: str, text: str) -> dict[str, Any]: } +def _raw_cmd(ts_iso: str, msg: str) -> dict[str, Any]: + """Parsed event whose bash CMD body is in ``msg``, fields={}. + + Mirrors what the unmodified collector parser produces for + PROMPT_COMMAND lines (the parser deliberately keeps fields empty + so the frontend pill rendering doesn't double-up). The aggregator + now extracts uid/user/src/pwd/command from that msg body. + """ + return { + "timestamp": ts_iso, + "decky": "SRV-DELTA-77", + "service": "bash", + "event_type": "command", + "attacker_ip": _ATTACKER_IP, + "fields": {}, + "msg": msg, + } + + def _session_recorded( ts_iso: str, sid: str, duration_s: float = 60.0, ) -> dict[str, Any]: @@ -196,6 +215,36 @@ def test_ttl_eviction_drops_old_commands() -> None: assert remaining == ["fresh"] +def test_session_emits_structured_uid_user_src_pwd_when_msg_carries_them( + aggregator: _SessionAggregator, + captured_publishes: list[tuple[str, dict[str, Any], str]], +) -> None: + """The bash PROMPT_COMMAND msg body splits into structured fields. + + Pins the "inspector wants UID/SRC/PWD/CMD on separate rows" + contract. Without this the inspector sees one big + ``CMD uid=0 user=root src=… cmd=…`` string and operators have to + eyeball the cmd= portion out of the prefix garbage. + """ + aggregator.add_event(_raw_cmd( + "2026-05-02T06:22:48", + "CMD uid=0 user=root src=192.168.1.5 pwd=/root " + "cmd=nmap -p- 192.168.1.0/24", + )) + aggregator.add_event(_session_recorded( + "2026-05-02T06:23:00", sid="sess-x", duration_s=120.0, + )) + payload = captured_publishes[0][1] + cmd = payload["commands"][0] + assert cmd["uid"] == "0" + assert cmd["user"] == "root" + assert cmd["src"] == "192.168.1.5" + assert cmd["pwd"] == "/root" + # ``command_text`` is the cmd= value, NOT the full "CMD uid=…" line. + # nmap's command line carries spaces — we must preserve them. + assert cmd["command_text"] == "nmap -p- 192.168.1.0/24" + + def test_publish_failure_is_swallowed() -> None: """A blowing-up publish must not propagate into the stream thread.""" def _bad(_t: str, _p: dict[str, Any], _e: str) -> None: diff --git a/tests/ttp/test_rule_engine_tagger.py b/tests/ttp/test_rule_engine_tagger.py index 82bb76df..a8e5bbb1 100644 --- a/tests/ttp/test_rule_engine_tagger.py +++ b/tests/ttp/test_rule_engine_tagger.py @@ -120,6 +120,65 @@ def test_get_tagger_includes_rule_engine_tagger_first( assert names[0] == "rule_engine" +@pytest.mark.asyncio +async def test_engine_auto_promotes_uid_user_src_pwd_into_evidence() -> None: + """Shell-rule evidence should always carry uid/user/src/pwd. + + The rule's ``evidence_fields: [command_text]`` is unchanged; the + engine adds the four shell-aux keys when ``source_kind="command"`` + so the inspector renders structured rows without forcing every + rule author to repeat the same evidence_fields list. + """ + rule = _rule(match_spec={"field": "command_text", "pattern": r"\bcat\b"}) + store = StubRuleStore(compiled=[rule]) + tagger = RuleEngineTagger(store) + await tagger._engine._index.hydrate_from(store, predicate=_is_engine_owned) + event = TaggerEvent( + source_kind="command", + source_id="cmd-1", + attacker_uuid="att-1", + identity_uuid=None, + session_id="sess-1", + decky_id="omega-decky", + payload={ + "command_text": "cat /etc/shadow", + "uid": "0", + "user": "root", + "src": "192.168.1.5", + "pwd": "/root", + }, + ) + tags = await tagger.tag(event) + assert len(tags) == 1 + ev = tags[0].evidence + assert ev["command_text"] == "cat /etc/shadow" + assert ev["uid"] == "0" + assert ev["user"] == "root" + assert ev["src"] == "192.168.1.5" + assert ev["pwd"] == "/root" + + +@pytest.mark.asyncio +async def test_engine_aux_fields_skip_missing_payload_keys() -> None: + """Missing aux keys don't appear in evidence (no ``None`` values).""" + rule = _rule(match_spec={"field": "command_text", "pattern": r"\bcat\b"}) + store = StubRuleStore(compiled=[rule]) + tagger = RuleEngineTagger(store) + await tagger._engine._index.hydrate_from(store, predicate=_is_engine_owned) + event = TaggerEvent( + source_kind="command", + source_id="cmd-1", + attacker_uuid="att-1", + identity_uuid=None, + session_id=None, + decky_id=None, + payload={"command_text": "cat /etc/shadow"}, + ) + tags = await tagger.tag(event) + ev = tags[0].evidence + assert ev == {"command_text": "cat /etc/shadow"} + + def test_rule_engine_tagger_is_in_iter_watchables() -> None: store = StubRuleStore() engine_tagger = RuleEngineTagger(store)