feat(ttp): split bash CMD evidence into structured uid/user/src/pwd/cmd rows
The inspector was dumping the whole `CMD uid=0 user=root src=… pwd=…
cmd=nmap -p- 192.168.1.0/24` syslog body into a single ``command_text``
blob. ANTI: "I'd like to separate the fields." Done — three layers
work together:
1. Collector session aggregator: new `_parse_cmd_msg` splits the bash
PROMPT_COMMAND msg into `{uid, user, src, pwd, command}`. The
session-ended envelope's per-command dict now carries the
structured fields, with `command_text` set to just the cmd= value
(preserving embedded whitespace — `nmap -p- 1.2.3.0/24` etc.).
2. Rule engine: per-source_kind auxiliary evidence list
(`_AUX_EVIDENCE_FIELDS`). For `command` events the engine
automatically promotes uid/user/src/pwd into the persisted
`evidence` dict on top of the rule's explicit `evidence_fields`.
Engine-controlled, not per-rule — adding a new aux field is one
line here, not a 30-rule YAML sweep, and rule authors can't
accidentally drop it.
3. TTPInspector frontend: evidence renders as a structured
`kvs` grid (UID / USER / SRC / PWD / CMD rows) instead of
pretty-printed JSON. Primary-order list keeps shell fields at
the top; everything else falls below alphabetically so unfamiliar
evidence shapes still surface predictably.
Tests:
- session_aggregator pins the structured-fields emit (uid/user/src/
pwd/command_text without "CMD" prefix, embedded whitespace
preserved).
- rule_engine_tagger pins the aux-field auto-promotion + the
no-`None`-leakage path when payload doesn't carry an aux key.
This commit is contained in:
@@ -151,6 +151,33 @@ _SESSION_AGG_TTL_SEC: float = _parse_float_env(
|
||||
)
|
||||
|
||||
|
||||
# Body of a bash PROMPT_COMMAND CMD line:
|
||||
# ``CMD uid=0 user=root src=192.168.1.5 pwd=/root cmd=ls /var/www/html``
|
||||
# Splits into the structured fields the inspector renders + the
|
||||
# residual ``cmd=`` value (which may itself contain spaces — preserve
|
||||
# everything after ``cmd=`` as one token, do NOT word-split).
|
||||
_CMD_BODY_HEAD_KV_RE = re.compile(r'(\w+)=(\S+)')
|
||||
|
||||
|
||||
def _parse_cmd_msg(msg: str) -> dict[str, str]:
|
||||
"""Split a bash CMD msg body into ``{uid, user, src, pwd, command}``.
|
||||
|
||||
Returns the empty dict on a non-CMD msg. ``command`` carries the
|
||||
full post-``cmd=`` rest, including any embedded whitespace —
|
||||
tools like ``nmap -p- 192.168.1.0/24`` would otherwise lose
|
||||
everything after the first space.
|
||||
"""
|
||||
if not msg.startswith("CMD "):
|
||||
return {}
|
||||
head, sep, cmd_rest = msg[4:].partition("cmd=")
|
||||
out: dict[str, str] = {}
|
||||
for k, v in _CMD_BODY_HEAD_KV_RE.findall(head):
|
||||
out[k] = v
|
||||
if sep:
|
||||
out["command"] = cmd_rest
|
||||
return out
|
||||
|
||||
|
||||
def _parse_iso_ts(value: str) -> Optional[datetime]:
|
||||
"""Best-effort ISO-8601 parse for parsed event timestamps.
|
||||
|
||||
@@ -252,18 +279,30 @@ class _SessionAggregator:
|
||||
if cmd_ts.timestamp() < cutoff_lo:
|
||||
continue
|
||||
cmd_fields = cmd_parsed.get("fields", {}) or {}
|
||||
# Pull structured uid/user/src/pwd/command from the bash
|
||||
# msg body. The inspector renders these as separate
|
||||
# key/value rows, which is much friendlier than dumping
|
||||
# the raw ``CMD uid=0 user=... cmd=...`` string into a
|
||||
# single ``command_text`` blob.
|
||||
parsed_kv = _parse_cmd_msg(str(cmd_parsed.get("msg", "")))
|
||||
cmd_text = (
|
||||
cmd_fields.get("command")
|
||||
or cmd_fields.get("cmd")
|
||||
or parsed_kv.get("command")
|
||||
or cmd_parsed.get("msg", "")
|
||||
)
|
||||
commands.append({
|
||||
entry: dict[str, Any] = {
|
||||
"id": f"{sid}#{idx}" if sid else f"{attacker_ip}-{cmd_ts.isoformat()}",
|
||||
"command_text": str(cmd_text),
|
||||
"ts": cmd_ts.isoformat(),
|
||||
"decky": cmd_parsed.get("decky", ""),
|
||||
"service": cmd_parsed.get("service", ""),
|
||||
})
|
||||
}
|
||||
for key in ("uid", "user", "src", "pwd"):
|
||||
value = parsed_kv.get(key) or cmd_fields.get(key)
|
||||
if value is not None:
|
||||
entry[key] = value
|
||||
commands.append(entry)
|
||||
|
||||
payload: dict[str, Any] = {
|
||||
"session_id": sid or None,
|
||||
|
||||
@@ -296,6 +296,21 @@ _DEFAULT_MATCH_FIELD: dict[str, str] = {
|
||||
}
|
||||
|
||||
|
||||
# Per-``source_kind`` auxiliary evidence fields that the engine
|
||||
# auto-promotes onto every emitted tag, on top of the rule's
|
||||
# explicit ``evidence_fields`` list. The point is operator UX: when
|
||||
# a shell rule fires on ``cat /etc/shadow``, the inspector should
|
||||
# show *who* ran it (``user``), *where from* (``src``), *as whom*
|
||||
# (``uid``), and the working directory (``pwd``) — without forcing
|
||||
# every rule author to add the same four fields to every shell
|
||||
# rule's ``evidence_fields`` list. Engine-controlled, not per-rule:
|
||||
# adding a new aux field is a one-line edit here, not a 30-rule
|
||||
# YAML sweep.
|
||||
_AUX_EVIDENCE_FIELDS: dict[str, tuple[str, ...]] = {
|
||||
"command": ("uid", "user", "src", "pwd"),
|
||||
}
|
||||
|
||||
|
||||
def _evaluate_rules(
|
||||
rules: list[CompiledRule], event: TaggerEvent,
|
||||
) -> list[TTPTag]:
|
||||
@@ -330,6 +345,12 @@ def _evaluate_rules(
|
||||
for field in rule.evidence_fields
|
||||
if field in event.payload
|
||||
}
|
||||
# Engine-controlled auxiliary fields per source_kind —
|
||||
# added on top of the rule's explicit list so the
|
||||
# inspector always sees uid/user/src/pwd on shell tags.
|
||||
for aux in _AUX_EVIDENCE_FIELDS.get(event.source_kind, ()):
|
||||
if aux in event.payload and aux not in evidence:
|
||||
evidence[aux] = event.payload.get(aux)
|
||||
out.append(TTPTag(
|
||||
uuid=tag_uuid,
|
||||
source_kind=event.source_kind,
|
||||
|
||||
@@ -131,6 +131,36 @@
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.ttp-evidence-kvs {
|
||||
background: rgba(0, 0, 0, 0.35);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 3px;
|
||||
padding: 8px 10px;
|
||||
font-family: var(--mono, ui-monospace, monospace);
|
||||
font-size: 0.74rem;
|
||||
display: grid;
|
||||
grid-template-columns: 60px 1fr;
|
||||
column-gap: 12px;
|
||||
row-gap: 3px;
|
||||
max-height: 320px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.ttp-evidence-k {
|
||||
color: var(--dim-color);
|
||||
letter-spacing: 1px;
|
||||
text-transform: uppercase;
|
||||
font-size: 0.66rem;
|
||||
align-self: baseline;
|
||||
padding-top: 2px;
|
||||
}
|
||||
|
||||
.ttp-evidence-v {
|
||||
color: var(--matrix);
|
||||
word-break: break-all;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
.ttp-empty {
|
||||
padding: 24px;
|
||||
text-align: center;
|
||||
|
||||
@@ -158,8 +158,63 @@ const TTPInspector: React.FC<Props> = ({
|
||||
);
|
||||
};
|
||||
|
||||
// Evidence keys we promote to the top of the per-card key/value
|
||||
// table for shell-command tags. Order matters — these render in
|
||||
// the listed order; everything else goes after, alphabetically.
|
||||
const _EVIDENCE_PRIMARY_ORDER = [
|
||||
'uid', 'user', 'src', 'pwd', 'cmd', 'command', 'command_text',
|
||||
];
|
||||
|
||||
const _EVIDENCE_LABEL: Record<string, string> = {
|
||||
uid: 'UID',
|
||||
user: 'USER',
|
||||
src: 'SRC',
|
||||
pwd: 'PWD',
|
||||
cmd: 'CMD',
|
||||
command: 'CMD',
|
||||
command_text: 'CMD',
|
||||
};
|
||||
|
||||
interface EvidenceRow {
|
||||
key: string;
|
||||
label: string;
|
||||
value: string;
|
||||
}
|
||||
|
||||
function flattenEvidence(evidence: Record<string, unknown>): EvidenceRow[] {
|
||||
const seen = new Set<string>();
|
||||
const rows: EvidenceRow[] = [];
|
||||
const stringify = (v: unknown): string => {
|
||||
if (v === null || v === undefined) return '—';
|
||||
if (typeof v === 'string') return v;
|
||||
if (typeof v === 'number' || typeof v === 'boolean') return String(v);
|
||||
return JSON.stringify(v);
|
||||
};
|
||||
for (const k of _EVIDENCE_PRIMARY_ORDER) {
|
||||
if (k in evidence && !seen.has(k)) {
|
||||
seen.add(k);
|
||||
rows.push({
|
||||
key: k,
|
||||
label: _EVIDENCE_LABEL[k] ?? k.toUpperCase(),
|
||||
value: stringify(evidence[k]),
|
||||
});
|
||||
}
|
||||
}
|
||||
const remaining = Object.keys(evidence)
|
||||
.filter((k) => !seen.has(k))
|
||||
.sort();
|
||||
for (const k of remaining) {
|
||||
rows.push({
|
||||
key: k,
|
||||
label: _EVIDENCE_LABEL[k] ?? k.toUpperCase(),
|
||||
value: stringify(evidence[k]),
|
||||
});
|
||||
}
|
||||
return rows;
|
||||
}
|
||||
|
||||
const TTPTagCard: React.FC<{ row: TTPTagDetailRow }> = ({ row }) => {
|
||||
const evidenceText = JSON.stringify(row.evidence ?? {}, null, 2);
|
||||
const evidenceRows = flattenEvidence(row.evidence ?? {});
|
||||
return (
|
||||
<div className="ttp-tag-card">
|
||||
<div className="ttp-card-head">
|
||||
@@ -186,7 +241,18 @@ const TTPTagCard: React.FC<{ row: TTPTagDetailRow }> = ({ row }) => {
|
||||
<div className="k">ATT&CK</div>
|
||||
<div className="v">{row.attack_release}</div>
|
||||
</div>
|
||||
<pre className="ttp-evidence">{evidenceText}</pre>
|
||||
{evidenceRows.length === 0 ? (
|
||||
<div className="ttp-empty" style={{ padding: '8px' }}>—</div>
|
||||
) : (
|
||||
<div className="ttp-evidence-kvs">
|
||||
{evidenceRows.map((r) => (
|
||||
<React.Fragment key={r.key}>
|
||||
<div className="ttp-evidence-k">{r.label}</div>
|
||||
<div className="ttp-evidence-v">{r.value}</div>
|
||||
</React.Fragment>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -31,6 +31,25 @@ def _cmd(ts_iso: str, text: str) -> dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
def _raw_cmd(ts_iso: str, msg: str) -> dict[str, Any]:
|
||||
"""Parsed event whose bash CMD body is in ``msg``, fields={}.
|
||||
|
||||
Mirrors what the unmodified collector parser produces for
|
||||
PROMPT_COMMAND lines (the parser deliberately keeps fields empty
|
||||
so the frontend pill rendering doesn't double-up). The aggregator
|
||||
now extracts uid/user/src/pwd/command from that msg body.
|
||||
"""
|
||||
return {
|
||||
"timestamp": ts_iso,
|
||||
"decky": "SRV-DELTA-77",
|
||||
"service": "bash",
|
||||
"event_type": "command",
|
||||
"attacker_ip": _ATTACKER_IP,
|
||||
"fields": {},
|
||||
"msg": msg,
|
||||
}
|
||||
|
||||
|
||||
def _session_recorded(
|
||||
ts_iso: str, sid: str, duration_s: float = 60.0,
|
||||
) -> dict[str, Any]:
|
||||
@@ -196,6 +215,36 @@ def test_ttl_eviction_drops_old_commands() -> None:
|
||||
assert remaining == ["fresh"]
|
||||
|
||||
|
||||
def test_session_emits_structured_uid_user_src_pwd_when_msg_carries_them(
|
||||
aggregator: _SessionAggregator,
|
||||
captured_publishes: list[tuple[str, dict[str, Any], str]],
|
||||
) -> None:
|
||||
"""The bash PROMPT_COMMAND msg body splits into structured fields.
|
||||
|
||||
Pins the "inspector wants UID/SRC/PWD/CMD on separate rows"
|
||||
contract. Without this the inspector sees one big
|
||||
``CMD uid=0 user=root src=… cmd=…`` string and operators have to
|
||||
eyeball the cmd= portion out of the prefix garbage.
|
||||
"""
|
||||
aggregator.add_event(_raw_cmd(
|
||||
"2026-05-02T06:22:48",
|
||||
"CMD uid=0 user=root src=192.168.1.5 pwd=/root "
|
||||
"cmd=nmap -p- 192.168.1.0/24",
|
||||
))
|
||||
aggregator.add_event(_session_recorded(
|
||||
"2026-05-02T06:23:00", sid="sess-x", duration_s=120.0,
|
||||
))
|
||||
payload = captured_publishes[0][1]
|
||||
cmd = payload["commands"][0]
|
||||
assert cmd["uid"] == "0"
|
||||
assert cmd["user"] == "root"
|
||||
assert cmd["src"] == "192.168.1.5"
|
||||
assert cmd["pwd"] == "/root"
|
||||
# ``command_text`` is the cmd= value, NOT the full "CMD uid=…" line.
|
||||
# nmap's command line carries spaces — we must preserve them.
|
||||
assert cmd["command_text"] == "nmap -p- 192.168.1.0/24"
|
||||
|
||||
|
||||
def test_publish_failure_is_swallowed() -> None:
|
||||
"""A blowing-up publish must not propagate into the stream thread."""
|
||||
def _bad(_t: str, _p: dict[str, Any], _e: str) -> None:
|
||||
|
||||
@@ -120,6 +120,65 @@ def test_get_tagger_includes_rule_engine_tagger_first(
|
||||
assert names[0] == "rule_engine"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_engine_auto_promotes_uid_user_src_pwd_into_evidence() -> None:
|
||||
"""Shell-rule evidence should always carry uid/user/src/pwd.
|
||||
|
||||
The rule's ``evidence_fields: [command_text]`` is unchanged; the
|
||||
engine adds the four shell-aux keys when ``source_kind="command"``
|
||||
so the inspector renders structured rows without forcing every
|
||||
rule author to repeat the same evidence_fields list.
|
||||
"""
|
||||
rule = _rule(match_spec={"field": "command_text", "pattern": r"\bcat\b"})
|
||||
store = StubRuleStore(compiled=[rule])
|
||||
tagger = RuleEngineTagger(store)
|
||||
await tagger._engine._index.hydrate_from(store, predicate=_is_engine_owned)
|
||||
event = TaggerEvent(
|
||||
source_kind="command",
|
||||
source_id="cmd-1",
|
||||
attacker_uuid="att-1",
|
||||
identity_uuid=None,
|
||||
session_id="sess-1",
|
||||
decky_id="omega-decky",
|
||||
payload={
|
||||
"command_text": "cat /etc/shadow",
|
||||
"uid": "0",
|
||||
"user": "root",
|
||||
"src": "192.168.1.5",
|
||||
"pwd": "/root",
|
||||
},
|
||||
)
|
||||
tags = await tagger.tag(event)
|
||||
assert len(tags) == 1
|
||||
ev = tags[0].evidence
|
||||
assert ev["command_text"] == "cat /etc/shadow"
|
||||
assert ev["uid"] == "0"
|
||||
assert ev["user"] == "root"
|
||||
assert ev["src"] == "192.168.1.5"
|
||||
assert ev["pwd"] == "/root"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_engine_aux_fields_skip_missing_payload_keys() -> None:
|
||||
"""Missing aux keys don't appear in evidence (no ``None`` values)."""
|
||||
rule = _rule(match_spec={"field": "command_text", "pattern": r"\bcat\b"})
|
||||
store = StubRuleStore(compiled=[rule])
|
||||
tagger = RuleEngineTagger(store)
|
||||
await tagger._engine._index.hydrate_from(store, predicate=_is_engine_owned)
|
||||
event = TaggerEvent(
|
||||
source_kind="command",
|
||||
source_id="cmd-1",
|
||||
attacker_uuid="att-1",
|
||||
identity_uuid=None,
|
||||
session_id=None,
|
||||
decky_id=None,
|
||||
payload={"command_text": "cat /etc/shadow"},
|
||||
)
|
||||
tags = await tagger.tag(event)
|
||||
ev = tags[0].evidence
|
||||
assert ev == {"command_text": "cat /etc/shadow"}
|
||||
|
||||
|
||||
def test_rule_engine_tagger_is_in_iter_watchables() -> None:
|
||||
store = StubRuleStore()
|
||||
engine_tagger = RuleEngineTagger(store)
|
||||
|
||||
Reference in New Issue
Block a user