feat(ttp): split bash CMD evidence into structured uid/user/src/pwd/cmd rows

The inspector was dumping the whole `CMD uid=0 user=root src=… pwd=…
cmd=nmap -p- 192.168.1.0/24` syslog body into a single ``command_text``
blob. ANTI: "I'd like to separate the fields." Done — three layers
work together:

1. Collector session aggregator: new `_parse_cmd_msg` splits the bash
   PROMPT_COMMAND msg into `{uid, user, src, pwd, command}`. The
   session-ended envelope's per-command dict now carries the
   structured fields, with `command_text` set to just the cmd= value
   (preserving embedded whitespace — `nmap -p- 1.2.3.0/24` etc.).

2. Rule engine: per-source_kind auxiliary evidence list
   (`_AUX_EVIDENCE_FIELDS`). For `command` events the engine
   automatically promotes uid/user/src/pwd into the persisted
   `evidence` dict on top of the rule's explicit `evidence_fields`.
   Engine-controlled, not per-rule — adding a new aux field is one
   line here, not a 30-rule YAML sweep, and rule authors can't
   accidentally drop it.

3. TTPInspector frontend: evidence renders as a structured
   `kvs` grid (UID / USER / SRC / PWD / CMD rows) instead of
   pretty-printed JSON. Primary-order list keeps shell fields at
   the top; everything else falls below alphabetically so unfamiliar
   evidence shapes still surface predictably.

Tests:
- session_aggregator pins the structured-fields emit (uid/user/src/
  pwd/command_text without "CMD" prefix, embedded whitespace
  preserved).
- rule_engine_tagger pins the aux-field auto-promotion + the
  no-`None`-leakage path when payload doesn't carry an aux key.
This commit is contained in:
2026-05-02 03:20:53 -04:00
parent 84699f89da
commit d1c4a48963
6 changed files with 268 additions and 4 deletions

View File

@@ -151,6 +151,33 @@ _SESSION_AGG_TTL_SEC: float = _parse_float_env(
)
# Body of a bash PROMPT_COMMAND CMD line:
# ``CMD uid=0 user=root src=192.168.1.5 pwd=/root cmd=ls /var/www/html``
# Splits into the structured fields the inspector renders + the
# residual ``cmd=`` value (which may itself contain spaces — preserve
# everything after ``cmd=`` as one token, do NOT word-split).
_CMD_BODY_HEAD_KV_RE = re.compile(r'(\w+)=(\S+)')
def _parse_cmd_msg(msg: str) -> dict[str, str]:
"""Split a bash CMD msg body into ``{uid, user, src, pwd, command}``.
Returns the empty dict on a non-CMD msg. ``command`` carries the
full post-``cmd=`` rest, including any embedded whitespace —
tools like ``nmap -p- 192.168.1.0/24`` would otherwise lose
everything after the first space.
"""
if not msg.startswith("CMD "):
return {}
head, sep, cmd_rest = msg[4:].partition("cmd=")
out: dict[str, str] = {}
for k, v in _CMD_BODY_HEAD_KV_RE.findall(head):
out[k] = v
if sep:
out["command"] = cmd_rest
return out
def _parse_iso_ts(value: str) -> Optional[datetime]:
"""Best-effort ISO-8601 parse for parsed event timestamps.
@@ -252,18 +279,30 @@ class _SessionAggregator:
if cmd_ts.timestamp() < cutoff_lo:
continue
cmd_fields = cmd_parsed.get("fields", {}) or {}
# Pull structured uid/user/src/pwd/command from the bash
# msg body. The inspector renders these as separate
# key/value rows, which is much friendlier than dumping
# the raw ``CMD uid=0 user=... cmd=...`` string into a
# single ``command_text`` blob.
parsed_kv = _parse_cmd_msg(str(cmd_parsed.get("msg", "")))
cmd_text = (
cmd_fields.get("command")
or cmd_fields.get("cmd")
or parsed_kv.get("command")
or cmd_parsed.get("msg", "")
)
commands.append({
entry: dict[str, Any] = {
"id": f"{sid}#{idx}" if sid else f"{attacker_ip}-{cmd_ts.isoformat()}",
"command_text": str(cmd_text),
"ts": cmd_ts.isoformat(),
"decky": cmd_parsed.get("decky", ""),
"service": cmd_parsed.get("service", ""),
})
}
for key in ("uid", "user", "src", "pwd"):
value = parsed_kv.get(key) or cmd_fields.get(key)
if value is not None:
entry[key] = value
commands.append(entry)
payload: dict[str, Any] = {
"session_id": sid or None,

View File

@@ -296,6 +296,21 @@ _DEFAULT_MATCH_FIELD: dict[str, str] = {
}
# Per-``source_kind`` auxiliary evidence fields that the engine
# auto-promotes onto every emitted tag, on top of the rule's
# explicit ``evidence_fields`` list. The point is operator UX: when
# a shell rule fires on ``cat /etc/shadow``, the inspector should
# show *who* ran it (``user``), *where from* (``src``), *as whom*
# (``uid``), and the working directory (``pwd``) — without forcing
# every rule author to add the same four fields to every shell
# rule's ``evidence_fields`` list. Engine-controlled, not per-rule:
# adding a new aux field is a one-line edit here, not a 30-rule
# YAML sweep.
_AUX_EVIDENCE_FIELDS: dict[str, tuple[str, ...]] = {
"command": ("uid", "user", "src", "pwd"),
}
def _evaluate_rules(
rules: list[CompiledRule], event: TaggerEvent,
) -> list[TTPTag]:
@@ -330,6 +345,12 @@ def _evaluate_rules(
for field in rule.evidence_fields
if field in event.payload
}
# Engine-controlled auxiliary fields per source_kind —
# added on top of the rule's explicit list so the
# inspector always sees uid/user/src/pwd on shell tags.
for aux in _AUX_EVIDENCE_FIELDS.get(event.source_kind, ()):
if aux in event.payload and aux not in evidence:
evidence[aux] = event.payload.get(aux)
out.append(TTPTag(
uuid=tag_uuid,
source_kind=event.source_kind,

View File

@@ -131,6 +131,36 @@
overflow-y: auto;
}
.ttp-evidence-kvs {
background: rgba(0, 0, 0, 0.35);
border: 1px solid var(--border);
border-radius: 3px;
padding: 8px 10px;
font-family: var(--mono, ui-monospace, monospace);
font-size: 0.74rem;
display: grid;
grid-template-columns: 60px 1fr;
column-gap: 12px;
row-gap: 3px;
max-height: 320px;
overflow-y: auto;
}
.ttp-evidence-k {
color: var(--dim-color);
letter-spacing: 1px;
text-transform: uppercase;
font-size: 0.66rem;
align-self: baseline;
padding-top: 2px;
}
.ttp-evidence-v {
color: var(--matrix);
word-break: break-all;
white-space: pre-wrap;
}
.ttp-empty {
padding: 24px;
text-align: center;

View File

@@ -158,8 +158,63 @@ const TTPInspector: React.FC<Props> = ({
);
};
// Evidence keys we promote to the top of the per-card key/value
// table for shell-command tags. Order matters — these render in
// the listed order; everything else goes after, alphabetically.
const _EVIDENCE_PRIMARY_ORDER = [
'uid', 'user', 'src', 'pwd', 'cmd', 'command', 'command_text',
];
const _EVIDENCE_LABEL: Record<string, string> = {
uid: 'UID',
user: 'USER',
src: 'SRC',
pwd: 'PWD',
cmd: 'CMD',
command: 'CMD',
command_text: 'CMD',
};
interface EvidenceRow {
key: string;
label: string;
value: string;
}
function flattenEvidence(evidence: Record<string, unknown>): EvidenceRow[] {
const seen = new Set<string>();
const rows: EvidenceRow[] = [];
const stringify = (v: unknown): string => {
if (v === null || v === undefined) return '—';
if (typeof v === 'string') return v;
if (typeof v === 'number' || typeof v === 'boolean') return String(v);
return JSON.stringify(v);
};
for (const k of _EVIDENCE_PRIMARY_ORDER) {
if (k in evidence && !seen.has(k)) {
seen.add(k);
rows.push({
key: k,
label: _EVIDENCE_LABEL[k] ?? k.toUpperCase(),
value: stringify(evidence[k]),
});
}
}
const remaining = Object.keys(evidence)
.filter((k) => !seen.has(k))
.sort();
for (const k of remaining) {
rows.push({
key: k,
label: _EVIDENCE_LABEL[k] ?? k.toUpperCase(),
value: stringify(evidence[k]),
});
}
return rows;
}
const TTPTagCard: React.FC<{ row: TTPTagDetailRow }> = ({ row }) => {
const evidenceText = JSON.stringify(row.evidence ?? {}, null, 2);
const evidenceRows = flattenEvidence(row.evidence ?? {});
return (
<div className="ttp-tag-card">
<div className="ttp-card-head">
@@ -186,7 +241,18 @@ const TTPTagCard: React.FC<{ row: TTPTagDetailRow }> = ({ row }) => {
<div className="k">ATT&CK</div>
<div className="v">{row.attack_release}</div>
</div>
<pre className="ttp-evidence">{evidenceText}</pre>
{evidenceRows.length === 0 ? (
<div className="ttp-empty" style={{ padding: '8px' }}></div>
) : (
<div className="ttp-evidence-kvs">
{evidenceRows.map((r) => (
<React.Fragment key={r.key}>
<div className="ttp-evidence-k">{r.label}</div>
<div className="ttp-evidence-v">{r.value}</div>
</React.Fragment>
))}
</div>
)}
</div>
);
};

View File

@@ -31,6 +31,25 @@ def _cmd(ts_iso: str, text: str) -> dict[str, Any]:
}
def _raw_cmd(ts_iso: str, msg: str) -> dict[str, Any]:
"""Parsed event whose bash CMD body is in ``msg``, fields={}.
Mirrors what the unmodified collector parser produces for
PROMPT_COMMAND lines (the parser deliberately keeps fields empty
so the frontend pill rendering doesn't double-up). The aggregator
now extracts uid/user/src/pwd/command from that msg body.
"""
return {
"timestamp": ts_iso,
"decky": "SRV-DELTA-77",
"service": "bash",
"event_type": "command",
"attacker_ip": _ATTACKER_IP,
"fields": {},
"msg": msg,
}
def _session_recorded(
ts_iso: str, sid: str, duration_s: float = 60.0,
) -> dict[str, Any]:
@@ -196,6 +215,36 @@ def test_ttl_eviction_drops_old_commands() -> None:
assert remaining == ["fresh"]
def test_session_emits_structured_uid_user_src_pwd_when_msg_carries_them(
aggregator: _SessionAggregator,
captured_publishes: list[tuple[str, dict[str, Any], str]],
) -> None:
"""The bash PROMPT_COMMAND msg body splits into structured fields.
Pins the "inspector wants UID/SRC/PWD/CMD on separate rows"
contract. Without this the inspector sees one big
``CMD uid=0 user=root src=… cmd=…`` string and operators have to
eyeball the cmd= portion out of the prefix garbage.
"""
aggregator.add_event(_raw_cmd(
"2026-05-02T06:22:48",
"CMD uid=0 user=root src=192.168.1.5 pwd=/root "
"cmd=nmap -p- 192.168.1.0/24",
))
aggregator.add_event(_session_recorded(
"2026-05-02T06:23:00", sid="sess-x", duration_s=120.0,
))
payload = captured_publishes[0][1]
cmd = payload["commands"][0]
assert cmd["uid"] == "0"
assert cmd["user"] == "root"
assert cmd["src"] == "192.168.1.5"
assert cmd["pwd"] == "/root"
# ``command_text`` is the cmd= value, NOT the full "CMD uid=…" line.
# nmap's command line carries spaces — we must preserve them.
assert cmd["command_text"] == "nmap -p- 192.168.1.0/24"
def test_publish_failure_is_swallowed() -> None:
"""A blowing-up publish must not propagate into the stream thread."""
def _bad(_t: str, _p: dict[str, Any], _e: str) -> None:

View File

@@ -120,6 +120,65 @@ def test_get_tagger_includes_rule_engine_tagger_first(
assert names[0] == "rule_engine"
@pytest.mark.asyncio
async def test_engine_auto_promotes_uid_user_src_pwd_into_evidence() -> None:
"""Shell-rule evidence should always carry uid/user/src/pwd.
The rule's ``evidence_fields: [command_text]`` is unchanged; the
engine adds the four shell-aux keys when ``source_kind="command"``
so the inspector renders structured rows without forcing every
rule author to repeat the same evidence_fields list.
"""
rule = _rule(match_spec={"field": "command_text", "pattern": r"\bcat\b"})
store = StubRuleStore(compiled=[rule])
tagger = RuleEngineTagger(store)
await tagger._engine._index.hydrate_from(store, predicate=_is_engine_owned)
event = TaggerEvent(
source_kind="command",
source_id="cmd-1",
attacker_uuid="att-1",
identity_uuid=None,
session_id="sess-1",
decky_id="omega-decky",
payload={
"command_text": "cat /etc/shadow",
"uid": "0",
"user": "root",
"src": "192.168.1.5",
"pwd": "/root",
},
)
tags = await tagger.tag(event)
assert len(tags) == 1
ev = tags[0].evidence
assert ev["command_text"] == "cat /etc/shadow"
assert ev["uid"] == "0"
assert ev["user"] == "root"
assert ev["src"] == "192.168.1.5"
assert ev["pwd"] == "/root"
@pytest.mark.asyncio
async def test_engine_aux_fields_skip_missing_payload_keys() -> None:
"""Missing aux keys don't appear in evidence (no ``None`` values)."""
rule = _rule(match_spec={"field": "command_text", "pattern": r"\bcat\b"})
store = StubRuleStore(compiled=[rule])
tagger = RuleEngineTagger(store)
await tagger._engine._index.hydrate_from(store, predicate=_is_engine_owned)
event = TaggerEvent(
source_kind="command",
source_id="cmd-1",
attacker_uuid="att-1",
identity_uuid=None,
session_id=None,
decky_id=None,
payload={"command_text": "cat /etc/shadow"},
)
tags = await tagger.tag(event)
ev = tags[0].evidence
assert ev == {"command_text": "cat /etc/shadow"}
def test_rule_engine_tagger_is_in_iter_watchables() -> None:
store = StubRuleStore()
engine_tagger = RuleEngineTagger(store)