feat(ttp): E.3.8 corpus + harness — labelled holdout fixture
Sub-step preceding the rule-pack commits per TTP_TAGGING.md:2967. Adds the per-rule precision suite scaffolding under tests/ttp/rule_precision/: - conftest.py: precision_engine fixture (RuleEngine populated from ./rules/ttp/), corpus_loader (real → seed → empty fallback), precision_for() helper for TP/FP accounting. - _build_corpus.py: extractor for a real prod corpus pull. Mandatory --exclude-ip / DECNET_TTP_CORPUS_EXCLUDE_IPS — operator IPs never end up in the committed exclusion list. Pulls both 'command' and 'unknown_command' event types. - corpus/seed_*.jsonl: synthetic seed rows for each cohort so the harness exercises in clean checkouts. - corpus/*.jsonl (operator-built) is gitignored. - test_corpus_loads.py: sentinel that every seed file parses.
This commit is contained in:
220
tests/ttp/rule_precision/conftest.py
Normal file
220
tests/ttp/rule_precision/conftest.py
Normal file
@@ -0,0 +1,220 @@
|
||||
"""Fixtures for the per-rule precision suite.
|
||||
|
||||
Two halves:
|
||||
|
||||
* :func:`precision_engine` — async fixture that builds a real
|
||||
:class:`RuleEngine` populated from ``./rules/ttp/`` via
|
||||
:func:`_parse_and_compile`. We bypass ``RuleEngine.watch_store``
|
||||
(which would loop forever on the inotify subscription) and instead
|
||||
call ``_install`` directly per rule. The engine reads no rules
|
||||
through any store ABC method, so a stub store passes for
|
||||
construction.
|
||||
* :func:`corpus_loader` — factory fixture returning labelled rows
|
||||
for a cohort (``commands`` / ``email`` / ``intel`` / ``canary`` /
|
||||
``behavioral``). Prefers ``corpus/<name>.jsonl`` (operator-built,
|
||||
gitignored) and falls back to ``corpus/seed_<name>.jsonl``
|
||||
(synthetic, committed). If neither exists the fixture returns ``[]``
|
||||
and the precision tests :func:`pytest.skip` themselves — letting a
|
||||
fresh checkout exercise the harness without a corpus.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections.abc import Callable
|
||||
from pathlib import Path
|
||||
from typing import Any, NamedTuple
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from decnet.ttp.base import TaggerEvent
|
||||
from decnet.ttp.impl.rule_engine import CompiledRule, RuleEngine
|
||||
from decnet.ttp.store.base import RuleState
|
||||
from decnet.ttp.store.impl.filesystem import _parse_and_compile
|
||||
|
||||
_RULES_DIR = Path(__file__).resolve().parents[2] / "rules" / "ttp"
|
||||
_CORPUS_DIR = Path(__file__).resolve().parent / "corpus"
|
||||
|
||||
|
||||
class CorpusRow(NamedTuple):
|
||||
"""One labelled corpus row.
|
||||
|
||||
``payload`` carries the keys the engine's match operator reads —
|
||||
``command_text`` for ``command``, ``raw_url`` for ``http_request``,
|
||||
etc. ``expected_rule_ids`` is the human-labelled ground truth: the
|
||||
rules a competent analyst would expect to fire on this row.
|
||||
Negative examples (``[]``) are load-bearing for precision: they
|
||||
catch FPs by giving non-matching payloads in the "matches" pool.
|
||||
"""
|
||||
|
||||
source_kind: str
|
||||
payload: dict[str, Any]
|
||||
expected_rule_ids: tuple[str, ...]
|
||||
label: str
|
||||
|
||||
|
||||
class _StubStore:
|
||||
"""Just enough of :class:`RuleStore` to satisfy ``RuleEngine.__init__``.
|
||||
|
||||
The fixture installs rules directly into the engine's dispatch
|
||||
index; no store method is actually called during precision tests.
|
||||
"""
|
||||
|
||||
async def load_compiled(self) -> list[CompiledRule]:
|
||||
return []
|
||||
|
||||
async def get_state(self, _rule_id: str) -> RuleState:
|
||||
return RuleState()
|
||||
|
||||
async def set_state(self, *_a: Any, **_kw: Any) -> None:
|
||||
return None
|
||||
|
||||
def subscribe_changes(self) -> Any:
|
||||
async def _gen() -> Any:
|
||||
if False: # pragma: no cover
|
||||
yield None
|
||||
return _gen()
|
||||
|
||||
|
||||
def _load_compiled_rules() -> list[CompiledRule]:
|
||||
"""Compile every YAML under ``./rules/ttp/`` once per session.
|
||||
|
||||
Ignores files that fail to parse — the cohort tests assert presence
|
||||
of their rule_id, so a bad YAML surfaces as a missing-rule failure
|
||||
rather than a confusing ImportError out of the fixture.
|
||||
"""
|
||||
if not _RULES_DIR.exists():
|
||||
return []
|
||||
out: list[CompiledRule] = []
|
||||
state = RuleState()
|
||||
for path in sorted(_RULES_DIR.iterdir()):
|
||||
if path.suffix not in {".yaml", ".yml"}:
|
||||
continue
|
||||
try:
|
||||
out.append(_parse_and_compile(path, state))
|
||||
except Exception: # noqa: BLE001 — broken YAML is its own failure surface
|
||||
continue
|
||||
return out
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def compiled_rules() -> list[CompiledRule]:
|
||||
return _load_compiled_rules()
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def precision_engine(
|
||||
compiled_rules: list[CompiledRule],
|
||||
) -> RuleEngine:
|
||||
"""A :class:`RuleEngine` with every YAML rule installed.
|
||||
|
||||
Bypasses ``watch_store()`` (it loops forever on the inotify
|
||||
subscription). The engine's public ``evaluate()`` reads only
|
||||
``self._by_kind`` / ``self._by_rule``, both populated here.
|
||||
"""
|
||||
engine = RuleEngine(_StubStore()) # type: ignore[arg-type]
|
||||
for rule in compiled_rules:
|
||||
engine._install(rule)
|
||||
return engine
|
||||
|
||||
|
||||
def _read_jsonl(path: Path) -> list[dict[str, Any]]:
|
||||
rows: list[dict[str, Any]] = []
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
for line in handle:
|
||||
stripped = line.strip()
|
||||
if not stripped or stripped.startswith("#"):
|
||||
continue
|
||||
rows.append(json.loads(stripped))
|
||||
return rows
|
||||
|
||||
|
||||
def _resolve_corpus_path(name: str) -> Path | None:
|
||||
real = _CORPUS_DIR / f"{name}.jsonl"
|
||||
if real.exists():
|
||||
return real
|
||||
seed = _CORPUS_DIR / f"seed_{name}.jsonl"
|
||||
if seed.exists():
|
||||
return seed
|
||||
return None
|
||||
|
||||
|
||||
def _row_from_dict(raw: dict[str, Any]) -> CorpusRow:
|
||||
return CorpusRow(
|
||||
source_kind=str(raw.get("source_kind", "command")),
|
||||
payload=dict(raw.get("payload", {})),
|
||||
expected_rule_ids=tuple(raw.get("expected_rule_ids", [])),
|
||||
label=str(raw.get("label", "")),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def corpus_loader() -> Callable[[str], list[CorpusRow]]:
|
||||
"""Return a callable that loads a cohort's labelled corpus.
|
||||
|
||||
Resolution order: ``corpus/<name>.jsonl`` (real, gitignored) →
|
||||
``corpus/seed_<name>.jsonl`` (synthetic, committed) → empty list
|
||||
(caller's tests skip).
|
||||
"""
|
||||
|
||||
def _load(name: str) -> list[CorpusRow]:
|
||||
path = _resolve_corpus_path(name)
|
||||
if path is None:
|
||||
return []
|
||||
return [_row_from_dict(row) for row in _read_jsonl(path)]
|
||||
|
||||
return _load
|
||||
|
||||
|
||||
def make_event(row: CorpusRow, source_id: str = "src") -> TaggerEvent:
|
||||
"""Materialise a :class:`CorpusRow` into a :class:`TaggerEvent`."""
|
||||
return TaggerEvent(
|
||||
source_kind=row.source_kind,
|
||||
source_id=source_id,
|
||||
attacker_uuid=None,
|
||||
identity_uuid=None,
|
||||
session_id=None,
|
||||
decky_id=None,
|
||||
payload=row.payload,
|
||||
)
|
||||
|
||||
|
||||
def precision_for(
|
||||
rule_id: str,
|
||||
rows: list[CorpusRow],
|
||||
fired: dict[str, list[str]],
|
||||
) -> tuple[float, int, int]:
|
||||
"""Compute precision = TP / (TP + FP) for *rule_id*.
|
||||
|
||||
``fired[label] = [rule_ids that matched this row]``. A row whose
|
||||
``expected_rule_ids`` includes *rule_id* and whose match set
|
||||
includes *rule_id* is a TP. A row that fired *rule_id* but did
|
||||
NOT expect it is a FP.
|
||||
|
||||
Returns ``(precision, tp, fp)``. Precision is ``1.0`` when no
|
||||
matches fired (vacuously) — callers gate that case with the
|
||||
``min_matches`` check before asserting.
|
||||
"""
|
||||
tp = 0
|
||||
fp = 0
|
||||
for row in rows:
|
||||
matched = rule_id in fired.get(row.label, [])
|
||||
expected = rule_id in row.expected_rule_ids
|
||||
if matched and expected:
|
||||
tp += 1
|
||||
elif matched and not expected:
|
||||
fp += 1
|
||||
total = tp + fp
|
||||
if total == 0:
|
||||
return 1.0, 0, 0
|
||||
return tp / total, tp, fp
|
||||
|
||||
|
||||
__all__ = [
|
||||
"CorpusRow",
|
||||
"compiled_rules",
|
||||
"precision_engine",
|
||||
"corpus_loader",
|
||||
"make_event",
|
||||
"precision_for",
|
||||
]
|
||||
Reference in New Issue
Block a user