30 YAMLs for the shell/command rule cohort per Appendix B (rules/ttp/). Splits into engine-active (R0007-R0029, regex on command_text / raw_url / user_agent) and lifter-bound (R0001-R0006, R0030 — the v0 RuleEngine cannot count auth attempts, do identity rollups, or parse fingerprint blobs; the BehavioralLifter / IdentityLifter / CredentialLifter consume them by rule_id at E.3.9 / E.3.13). test_command_rules.py asserts: - every R000N has a YAML that compiles - lifter-bound rules NEVER fire from the v0 engine (regression guard against a YAML drifting into a regex match.spec) - engine-active rules meet their Appendix-C precision target against the seed corpus (≥0.95 high-conf, ≥0.80 medium) Conftest fixes: precision_engine moved to module-scope so module- scope precomputed dispatch fixture (fired_by_label) can request it; _RULES_DIR path bumped from parents[2] to parents[3] so the loader resolves the project root regardless of pytest cwd; make_event synthesizes attacker_uuid so TTPTag's anchor invariant is satisfied. Seed corpus broadened: positive examples for every regex rule plus 6 negative examples across innocuous shell verbs (ls, echo, cd, ps, df, free) so FPs surface in precision rather than passing vacuously.
229 lines
7.2 KiB
Python
229 lines
7.2 KiB
Python
"""Fixtures for the per-rule precision suite.
|
|
|
|
Two halves:
|
|
|
|
* :func:`precision_engine` — async fixture that builds a real
|
|
:class:`RuleEngine` populated from ``./rules/ttp/`` via
|
|
:func:`_parse_and_compile`. We bypass ``RuleEngine.watch_store``
|
|
(which would loop forever on the inotify subscription) and instead
|
|
call ``_install`` directly per rule. The engine reads no rules
|
|
through any store ABC method, so a stub store passes for
|
|
construction.
|
|
* :func:`corpus_loader` — factory fixture returning labelled rows
|
|
for a cohort (``commands`` / ``email`` / ``intel`` / ``canary`` /
|
|
``behavioral``). Prefers ``corpus/<name>.jsonl`` (operator-built,
|
|
gitignored) and falls back to ``corpus/seed_<name>.jsonl``
|
|
(synthetic, committed). If neither exists the fixture returns ``[]``
|
|
and the precision tests :func:`pytest.skip` themselves — letting a
|
|
fresh checkout exercise the harness without a corpus.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from collections.abc import Callable
|
|
from pathlib import Path
|
|
from typing import Any, NamedTuple
|
|
|
|
import pytest
|
|
import pytest_asyncio
|
|
|
|
from decnet.ttp.base import TaggerEvent
|
|
from decnet.ttp.impl.rule_engine import CompiledRule, RuleEngine
|
|
from decnet.ttp.store.base import RuleState
|
|
from decnet.ttp.store.impl.filesystem import _parse_and_compile
|
|
|
|
_RULES_DIR = Path(__file__).resolve().parents[3] / "rules" / "ttp"
|
|
_CORPUS_DIR = Path(__file__).resolve().parent / "corpus"
|
|
|
|
|
|
class CorpusRow(NamedTuple):
|
|
"""One labelled corpus row.
|
|
|
|
``payload`` carries the keys the engine's match operator reads —
|
|
``command_text`` for ``command``, ``raw_url`` for ``http_request``,
|
|
etc. ``expected_rule_ids`` is the human-labelled ground truth: the
|
|
rules a competent analyst would expect to fire on this row.
|
|
Negative examples (``[]``) are load-bearing for precision: they
|
|
catch FPs by giving non-matching payloads in the "matches" pool.
|
|
"""
|
|
|
|
source_kind: str
|
|
payload: dict[str, Any]
|
|
expected_rule_ids: tuple[str, ...]
|
|
label: str
|
|
|
|
|
|
class _StubStore:
|
|
"""Just enough of :class:`RuleStore` to satisfy ``RuleEngine.__init__``.
|
|
|
|
The fixture installs rules directly into the engine's dispatch
|
|
index; no store method is actually called during precision tests.
|
|
"""
|
|
|
|
async def load_compiled(self) -> list[CompiledRule]:
|
|
return []
|
|
|
|
async def get_state(self, _rule_id: str) -> RuleState:
|
|
return RuleState()
|
|
|
|
async def set_state(self, *_a: Any, **_kw: Any) -> None:
|
|
return None
|
|
|
|
def subscribe_changes(self) -> Any:
|
|
async def _gen() -> Any:
|
|
if False: # pragma: no cover
|
|
yield None
|
|
return _gen()
|
|
|
|
|
|
def _load_compiled_rules() -> list[CompiledRule]:
|
|
"""Compile every YAML under ``./rules/ttp/`` once per session.
|
|
|
|
Ignores files that fail to parse — the cohort tests assert presence
|
|
of their rule_id, so a bad YAML surfaces as a missing-rule failure
|
|
rather than a confusing ImportError out of the fixture.
|
|
"""
|
|
if not _RULES_DIR.exists():
|
|
return []
|
|
out: list[CompiledRule] = []
|
|
state = RuleState()
|
|
for path in sorted(_RULES_DIR.iterdir()):
|
|
if path.suffix not in {".yaml", ".yml"}:
|
|
continue
|
|
try:
|
|
out.append(_parse_and_compile(path, state))
|
|
except Exception: # noqa: BLE001 — broken YAML is its own failure surface
|
|
continue
|
|
return out
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def compiled_rules() -> list[CompiledRule]:
|
|
return _load_compiled_rules()
|
|
|
|
|
|
@pytest_asyncio.fixture(scope="module")
|
|
async def precision_engine(
|
|
compiled_rules: list[CompiledRule],
|
|
) -> RuleEngine:
|
|
"""A :class:`RuleEngine` with every YAML rule installed.
|
|
|
|
Bypasses ``watch_store()`` (it loops forever on the inotify
|
|
subscription). The engine's public ``evaluate()`` reads only
|
|
``self._by_kind`` / ``self._by_rule``, both populated here.
|
|
"""
|
|
engine = RuleEngine(_StubStore()) # type: ignore[arg-type]
|
|
for rule in compiled_rules:
|
|
engine._install(rule)
|
|
return engine
|
|
|
|
|
|
def _read_jsonl(path: Path) -> list[dict[str, Any]]:
|
|
rows: list[dict[str, Any]] = []
|
|
with path.open("r", encoding="utf-8") as handle:
|
|
for line in handle:
|
|
stripped = line.strip()
|
|
if not stripped or stripped.startswith("#"):
|
|
continue
|
|
rows.append(json.loads(stripped))
|
|
return rows
|
|
|
|
|
|
def _resolve_corpus_path(name: str) -> Path | None:
|
|
real = _CORPUS_DIR / f"{name}.jsonl"
|
|
if real.exists():
|
|
return real
|
|
seed = _CORPUS_DIR / f"seed_{name}.jsonl"
|
|
if seed.exists():
|
|
return seed
|
|
return None
|
|
|
|
|
|
def _row_from_dict(raw: dict[str, Any]) -> CorpusRow:
|
|
return CorpusRow(
|
|
source_kind=str(raw.get("source_kind", "command")),
|
|
payload=dict(raw.get("payload", {})),
|
|
expected_rule_ids=tuple(raw.get("expected_rule_ids", [])),
|
|
label=str(raw.get("label", "")),
|
|
)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def corpus_loader() -> Callable[[str], list[CorpusRow]]:
|
|
"""Return a callable that loads a cohort's labelled corpus.
|
|
|
|
Resolution order: ``corpus/<name>.jsonl`` (real, gitignored) →
|
|
``corpus/seed_<name>.jsonl`` (synthetic, committed) → empty list
|
|
(caller's tests skip).
|
|
"""
|
|
|
|
def _load(name: str) -> list[CorpusRow]:
|
|
path = _resolve_corpus_path(name)
|
|
if path is None:
|
|
return []
|
|
return [_row_from_dict(row) for row in _read_jsonl(path)]
|
|
|
|
return _load
|
|
|
|
|
|
def make_event(row: CorpusRow, source_id: str = "src") -> TaggerEvent:
|
|
"""Materialise a :class:`CorpusRow` into a :class:`TaggerEvent`.
|
|
|
|
Sets a deterministic ``attacker_uuid`` derived from the row label so
|
|
the downstream ``TTPTag`` constructor's "at least one of
|
|
attacker_uuid/identity_uuid" invariant is satisfied. The corpus
|
|
rows themselves don't carry attacker identity — they're per-payload
|
|
fixtures, not per-attacker — so this synthesis is purely a test
|
|
plumbing concern.
|
|
"""
|
|
return TaggerEvent(
|
|
source_kind=row.source_kind,
|
|
source_id=source_id,
|
|
attacker_uuid=f"corpus-{row.label}",
|
|
identity_uuid=None,
|
|
session_id=None,
|
|
decky_id=None,
|
|
payload=row.payload,
|
|
)
|
|
|
|
|
|
def precision_for(
|
|
rule_id: str,
|
|
rows: list[CorpusRow],
|
|
fired: dict[str, list[str]],
|
|
) -> tuple[float, int, int]:
|
|
"""Compute precision = TP / (TP + FP) for *rule_id*.
|
|
|
|
``fired[label] = [rule_ids that matched this row]``. A row whose
|
|
``expected_rule_ids`` includes *rule_id* and whose match set
|
|
includes *rule_id* is a TP. A row that fired *rule_id* but did
|
|
NOT expect it is a FP.
|
|
|
|
Returns ``(precision, tp, fp)``. Precision is ``1.0`` when no
|
|
matches fired (vacuously) — callers gate that case with the
|
|
``min_matches`` check before asserting.
|
|
"""
|
|
tp = 0
|
|
fp = 0
|
|
for row in rows:
|
|
matched = rule_id in fired.get(row.label, [])
|
|
expected = rule_id in row.expected_rule_ids
|
|
if matched and expected:
|
|
tp += 1
|
|
elif matched and not expected:
|
|
fp += 1
|
|
total = tp + fp
|
|
if total == 0:
|
|
return 1.0, 0, 0
|
|
return tp / total, tp, fp
|
|
|
|
|
|
__all__ = [
|
|
"CorpusRow",
|
|
"compiled_rules",
|
|
"precision_engine",
|
|
"corpus_loader",
|
|
"make_event",
|
|
"precision_for",
|
|
]
|