Files
DECNET/decnet/ttp/impl/rule_engine.py
anti d1c4a48963 feat(ttp): split bash CMD evidence into structured uid/user/src/pwd/cmd rows
The inspector was dumping the whole `CMD uid=0 user=root src=… pwd=…
cmd=nmap -p- 192.168.1.0/24` syslog body into a single ``command_text``
blob. ANTI: "I'd like to separate the fields." Done — three layers
work together:

1. Collector session aggregator: new `_parse_cmd_msg` splits the bash
   PROMPT_COMMAND msg into `{uid, user, src, pwd, command}`. The
   session-ended envelope's per-command dict now carries the
   structured fields, with `command_text` set to just the cmd= value
   (preserving embedded whitespace — `nmap -p- 1.2.3.0/24` etc.).

2. Rule engine: per-source_kind auxiliary evidence list
   (`_AUX_EVIDENCE_FIELDS`). For `command` events the engine
   automatically promotes uid/user/src/pwd into the persisted
   `evidence` dict on top of the rule's explicit `evidence_fields`.
   Engine-controlled, not per-rule — adding a new aux field is one
   line here, not a 30-rule YAML sweep, and rule authors can't
   accidentally drop it.

3. TTPInspector frontend: evidence renders as a structured
   `kvs` grid (UID / USER / SRC / PWD / CMD rows) instead of
   pretty-printed JSON. Primary-order list keeps shell fields at
   the top; everything else falls below alphabetically so unfamiliar
   evidence shapes still surface predictably.

Tests:
- session_aggregator pins the structured-fields emit (uid/user/src/
  pwd/command_text without "CMD" prefix, embedded whitespace
  preserved).
- rule_engine_tagger pins the aux-field auto-promotion + the
  no-`None`-leakage path when payload doesn't carry an aux key.
2026-05-02 03:20:53 -04:00

437 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Rule engine contract — `CompiledRule`, `RuleEngine`, `RuleSchema`.
Contract step E.1.5 of ``development/TTP_TAGGING.md``. Shape only — no
real evaluation logic, no YAML parsing, no dispatch. The implementation
phase (E.3) replaces every empty body in this file; *callers compile
against the public surface here today* so subsequent contract commits
(lifters E.1.6, worker E.1.7) can import without churn.
Three classes live in this module:
* :class:`CompiledRule` — frozen, hashable record the engine evaluates
against. The store produces these after validating raw YAML through
:class:`RuleSchema` and stamping operational :class:`RuleState`.
* :class:`RuleSchema` — Pydantic model for raw YAML rule shape.
Operationally owned by the store (it reads disk and validates),
declared here per the file mapping in the design doc — keeping the
schema and the compiled record next to each other lets reviewers see
the YAML→runtime translation in one diff.
* :class:`RuleEngine` — consumes a :class:`RuleStore`, evaluates one
:class:`TaggerEvent` at a time. Hot-reload via
:meth:`RuleEngine.watch_store` swaps individual compiled rules in the
dispatch index atomically — never bulk-rebuilds.
The :class:`RuleStore` and :class:`RuleState` types arrive in E.1.11;
they are forward-referenced under :data:`TYPE_CHECKING` here so this
file is importable before that step lands.
"""
from __future__ import annotations
import re
from collections.abc import Iterator
from contextlib import contextmanager
from typing import TYPE_CHECKING, Any, NamedTuple
from pydantic import BaseModel, Field
from decnet import telemetry as _telemetry
from decnet.logging import get_logger
from decnet.ttp.base import Tagger, TaggerEvent
from decnet.ttp.impl._rule_index import RuleIndex
from decnet.ttp.impl._state import apply_ceiling, is_active
from decnet.web.db.models.ttp import TTPTag, compute_tag_uuid
if TYPE_CHECKING:
# Store contracts ship in E.1.11. Forward-referenced under
# TYPE_CHECKING so this module is importable in the contract phase
# without creating a circular shape dependency on a not-yet-shipped
# subpackage. Concrete construction happens at the worker layer
# (E.1.7) where both halves are in scope.
from decnet.ttp.store.base import RuleChange, RuleState, RuleStore
_log = get_logger("ttp.engine")
@contextmanager
def _span(name: str, **attrs: Any) -> Iterator[Any]:
"""Span context manager gated on ``DECNET_DEVELOPER_TRACING``.
Same shape as the helpers in :mod:`decnet.ttp.store.impl.filesystem`
/ :mod:`decnet.ttp.store.impl.database`: zero per-call overhead when
tracing is off (single attribute lookup, then yield ``None``);
late-bound tracer when on so the test_tracing monkeypatch reaches
us. Modeled on the project's ``@traced`` / ``wrap_repository``
no-overhead-when-disabled pattern.
"""
if not _telemetry._ENABLED:
yield None
return
tracer = _telemetry.get_tracer("ttp.engine")
with tracer.start_as_current_span(name) as span:
for key, value in attrs.items():
try:
span.set_attribute(key, value)
except (TypeError, ValueError):
continue
yield span
# ATT&CK release stamped on every emitted tag. Pinned at the engine
# layer rather than per-rule because rule authors don't manage ATT&CK
# matrix drift; the engine owns that. Bumping this value invalidates
# tag UUID continuity across deploys, so the bump must land alongside a
# documented ATT&CK upgrade per TTP_TAGGING.md §"Hard parts §8".
_ATTACK_RELEASE: str = "v15.1"
class CompiledRule(NamedTuple):
"""Runtime-ready representation of one YAML rule.
Frozen by virtue of being a NamedTuple — the design doc's
"atomic-swap concurrency" property (E.2.14b) requires that a rule
in the dispatch index can never be torn mid-evaluate. NamedTuple
rather than ``@dataclass(frozen=True)`` because instances are
swapped *by replacement* and benefit from the cheaper allocator;
`FrozenInstanceError` parity is preserved by the in-test smoke
signal in E.2.14b.
Fields mirror the YAML rule shape one-to-one except for ``state``,
which the store stamps in at compile time after merging operational
state (enabled / disabled / clipped, confidence ceiling, expiry).
The engine therefore never reads :class:`RuleState` directly — it
only consults the value attached to each :class:`CompiledRule`.
"""
rule_id: str
rule_version: int
name: str
#: ``source_kind`` strings the rule is allowed to fire on. Frozen so
#: it can live in a set / dispatch index key without copying.
applies_to: frozenset[str]
#: Opaque match spec — interpretation belongs to the engine impl
#: phase (E.3). Kept ``dict[str, Any]`` here rather than typed so
#: rule authors can extend match operators without touching the ABC.
match_spec: dict[str, Any]
#: ``((technique_id, sub_technique_id | None, tactic, confidence), ...)``
#: per emit. Tuple-of-tuples, not list, so the record stays hashable.
#: One YAML rule may emit N tags from a single match — see
#: TTP_TAGGING.md §"One event maps to many techniques".
emits: tuple[tuple[str, str | None, str, float], ...]
#: Names of evidence keys the rule populates on emitted tags.
evidence_fields: tuple[str, ...]
#: Operational state stamped in by the store at compile time.
state: "RuleState"
#: Human-readable description from the YAML rule. Surfaced in the
#: ``GET /api/v1/ttp/rules`` catalogue. Default empty so existing
#: callers constructing ``CompiledRule`` (lifter unit tests) keep
#: working without churn.
description: str = ""
class RuleSchema(BaseModel):
"""Pydantic model for the raw YAML rule shape.
Validation surface only — no runtime semantics. The store calls
:meth:`model_validate` on each parsed YAML document; the engine
never touches this class. Adding a new top-level rule field means
adding it here AND extending :class:`CompiledRule` in the same
commit, so the YAML→runtime mapping stays one-to-one.
"""
rule_id: str
rule_version: int
name: str
description: str = ""
applies_to: list[str]
match: dict[str, Any]
#: ``[{"tactic": "TA0007", "technique_id": "T1083",
#: "sub_technique_id": "T1083.001"?, "confidence": 0.85}, ...]``
#: Per-emit tactic + confidence ride here so a single rule can carry
#: multiple precision targets (the "one event maps to many techniques"
#: case from TTP_TAGGING.md, including different confidences per
#: technique on the same match).
emits: list[dict[str, Any]]
evidence_fields: list[str] = Field(default_factory=list)
class RuleEngine:
"""Evaluates :class:`TaggerEvent` against compiled rules from a store.
Construction takes the store reference; the engine never reads YAML
directly. The dispatch index (``self._by_kind``) is rebuilt by
:meth:`watch_store` on each per-rule change event from
``store.subscribe_changes()`` — never bulk-rebuilt — so an edit to
one rule cannot stall evaluation of unrelated rules.
Contract phase: every method has an empty body. The implementation
phase (E.3) wires real compile + evaluate logic; callers compiling
against the public surface today will not need to change.
"""
def __init__(self, store: "RuleStore") -> None:
self._store = store
# Dispatch index extracted to RuleIndex so per-source lifters
# (E.3.9E.3.13) reuse the same atomic-swap protocol. Legacy
# ``_by_kind`` / ``_by_rule`` properties below proxy to it for
# callers (and tests) that still poke the dispatch index directly.
self._index = RuleIndex()
@property
def _by_kind(self) -> dict[str, list[CompiledRule]]:
return self._index._by_kind
@_by_kind.setter
def _by_kind(self, value: dict[str, list[CompiledRule]]) -> None:
self._index._by_kind = value
@property
def _by_rule(self) -> dict[str, CompiledRule]:
return self._index._by_rule
@_by_rule.setter
def _by_rule(self, value: dict[str, CompiledRule]) -> None:
self._index._by_rule = value
async def evaluate(self, event: TaggerEvent) -> list[TTPTag]:
"""Return zero or more tags produced by rules matching *event*.
Dispatches by ``event.source_kind``; for each rule whose
``applies_to`` set covers the kind, runs the match spec against
``event.payload`` and emits one :class:`TTPTag` per ``emits``
entry. Confidence is the per-emit base scaled by the rule's
:class:`RuleState.confidence_max` ceiling (no-op when ``None``).
Disabled rules are skipped; the store auto-reverts expired
states, but the engine double-checks ``expires_at`` as
defense-in-depth.
"""
rules = self._index.by_kind(event.source_kind)
if not rules:
return []
with _span(
"ttp.eval",
attacker_uuid=str(event.attacker_uuid or ""),
identity_uuid=str(event.identity_uuid or ""),
source_kind=event.source_kind,
):
return _evaluate_rules(rules, event)
async def watch_store(self) -> None:
"""Subscribe to per-rule changes and atomically swap them in.
Delegates to :meth:`RuleIndex.watch`: loads the initial corpus
from :meth:`RuleStore.load_compiled`, builds the dispatch
index, then drains :meth:`RuleStore.subscribe_changes` forever.
Each ``definition`` change replaces the affected rule wholesale;
each ``state`` change re-stamps the existing
:class:`CompiledRule`'s ``state`` field via NamedTuple
``_replace`` (single dict assignment, no in-place mutation).
"""
await self._index.watch(self._store)
# ── Internals ───────────────────────────────────────────────────
# Back-compat shims — the dispatch-index protocol moved into
# :class:`RuleIndex`. Existing callers / tests that poke at
# ``_install`` / ``_evict`` / ``_apply_change`` keep working.
def _install(self, rule: CompiledRule) -> None:
self._index.install(rule)
def _evict(self, rule_id: str) -> None:
self._index.evict(rule_id)
def _apply_change(
self, change: "RuleChange", state_cls: type,
) -> None:
self._index.apply_change(change, state_cls)
def _match_event(rule: CompiledRule, event: TaggerEvent) -> bool:
"""Run the rule's match spec against ``event.payload``.
For v0 the only operator is ``pattern`` — a regex against a
payload field. The field name comes from ``match_spec["field"]``
if present, otherwise the per-source-kind default
(``command_text`` for ``command``, ``raw_url`` for
``http_request``, etc.). A future PR can extend this to
``contains``, ``equals``, ``in_set`` without touching the engine
surface — only this function changes.
"""
spec = rule.match_spec
pattern = spec.get("pattern")
if pattern is None:
return False
field = spec.get("field") or _default_field(event.source_kind)
if field is None:
return False
haystack = event.payload.get(field)
if not isinstance(haystack, str):
return False
try:
return re.search(pattern, haystack) is not None
except re.error:
# Malformed regex made it past schema validation — log and
# don't fire. The deploy-time hook (load_compiled) catches
# most of these; this path is the runtime fallback.
_log.warning(
"ttp.engine: bad regex in rule %s: %r", rule.rule_id, pattern,
)
return False
def _default_field(source_kind: str) -> str | None:
return _DEFAULT_MATCH_FIELD.get(source_kind)
# Per-source_kind default field for the ``pattern`` operator. New
# source_kinds can override via ``match.field`` in the YAML rule.
_DEFAULT_MATCH_FIELD: dict[str, str] = {
"command": "command_text",
"http_request": "raw_url",
"email": "subject",
"intel": "verdict",
"canary_fingerprint": "ua_signature",
"auth_attempt": "username",
"payload": "payload_text",
}
# Per-``source_kind`` auxiliary evidence fields that the engine
# auto-promotes onto every emitted tag, on top of the rule's
# explicit ``evidence_fields`` list. The point is operator UX: when
# a shell rule fires on ``cat /etc/shadow``, the inspector should
# show *who* ran it (``user``), *where from* (``src``), *as whom*
# (``uid``), and the working directory (``pwd``) — without forcing
# every rule author to add the same four fields to every shell
# rule's ``evidence_fields`` list. Engine-controlled, not per-rule:
# adding a new aux field is a one-line edit here, not a 30-rule
# YAML sweep.
_AUX_EVIDENCE_FIELDS: dict[str, tuple[str, ...]] = {
"command": ("uid", "user", "src", "pwd"),
}
def _evaluate_rules(
rules: list[CompiledRule], event: TaggerEvent,
) -> list[TTPTag]:
out: list[TTPTag] = []
for rule in rules:
if not is_active(rule.state):
continue
if not _match_event(rule, event):
continue
with _span(
"ttp.rule.fire",
rule_id=rule.rule_id,
rule_version=rule.rule_version,
) as span:
for technique_id, sub_technique_id, tactic, base_conf in rule.emits:
if span is not None:
try:
span.set_attribute("technique_id", technique_id)
except (TypeError, ValueError):
pass
confidence = apply_ceiling(base_conf, rule.state)
tag_uuid = compute_tag_uuid(
source_kind=event.source_kind,
source_id=event.source_id,
rule_id=rule.rule_id,
rule_version=rule.rule_version,
technique_id=technique_id,
sub_technique_id=sub_technique_id,
)
evidence: dict[str, Any] = {
field: event.payload.get(field)
for field in rule.evidence_fields
if field in event.payload
}
# Engine-controlled auxiliary fields per source_kind —
# added on top of the rule's explicit list so the
# inspector always sees uid/user/src/pwd on shell tags.
for aux in _AUX_EVIDENCE_FIELDS.get(event.source_kind, ()):
if aux in event.payload and aux not in evidence:
evidence[aux] = event.payload.get(aux)
out.append(TTPTag(
uuid=tag_uuid,
source_kind=event.source_kind,
source_id=event.source_id,
attacker_uuid=event.attacker_uuid,
identity_uuid=event.identity_uuid,
session_id=event.session_id,
decky_id=event.decky_id,
tactic=tactic,
technique_id=technique_id,
sub_technique_id=sub_technique_id,
confidence=confidence,
rule_id=rule.rule_id,
rule_version=rule.rule_version,
evidence=evidence,
attack_release=_ATTACK_RELEASE,
))
return out
def _is_engine_owned(rule: CompiledRule) -> bool:
"""Predicate: rule belongs to the generic RuleEngine, not a lifter.
Per-source lifters (Behavioral, Intel, …) tag their rules with
``match.kind: lifter:<name>_*``. The :class:`RuleEngineTagger`
claims everything else — pure ``pattern`` rules whose semantics
are "regex against a payload field" with no cross-event state.
"""
kind = rule.match_spec.get("kind", "")
if isinstance(kind, str) and kind.startswith("lifter:"):
return False
return True
class RuleEngineTagger(Tagger):
"""Tagger adapter that wires :class:`RuleEngine` into the composite.
The composite tagger fans events out to its children by
``HANDLES``; without this adapter the canonical rule-based engine
from §"Tagging engines, layered §1" of TTP_TAGGING.md never sees
any traffic. This class is intentionally thin — all dispatch and
hot-reload logic lives in :class:`RuleEngine` / :class:`RuleIndex`;
we only translate between the ``Tagger.tag`` ABC and
:meth:`RuleEngine.evaluate`, and route ``watch_store()`` through a
predicate that excludes lifter-owned rules so the engine's
dispatch index doesn't hold rules another tagger already claims.
``HANDLES`` enumerates the source kinds whose YAML rules typically
live outside any per-source lifter — shell command rules
(``command``), HTTP request pattern rules (``http_request``),
auth attempts handled by raw regex rather than the
:class:`CredentialLifter` cross-event counter, and generic
``payload`` matches. The composite uses this for routing; the
engine itself filters by ``applies_to`` from the YAML.
"""
name = "rule_engine"
HANDLES = frozenset({"command", "http_request", "auth_attempt", "payload"})
def __init__(self, store: "RuleStore") -> None:
self._engine = RuleEngine(store)
self._store = store
async def tag(self, event: TaggerEvent) -> list[TTPTag]:
return await self._engine.evaluate(event)
async def watch_store(self) -> None:
# Filter to engine-owned rules so the dispatch index stays
# disjoint from per-lifter ownership. Without the predicate
# the engine would carry every lifter's rules too — they would
# never match (no `pattern` operator), but they would inflate
# the index and confuse tooling.
await self._engine._index.watch(
self._store, predicate=_is_engine_owned,
)
__all__ = [
"CompiledRule",
"RuleEngine",
"RuleEngineTagger",
"RuleSchema",
]