feat(ttp): E.3.7 RuleEngine — evaluate + atomic-swap watch_store

Implements the rule engine body left empty at contract phase: evaluate() dispatches by source_kind through self._by_kind, runs the rule's match spec against event.payload, and emits one TTPTag per emits entry. watch_store() loads the initial corpus from RuleStore.load_compiled, then drains subscribe_changes, applying definition changes via single-statement dict assignment (atomic swap, GIL-atomic to readers) and state changes via NamedTuple._replace on the existing CompiledRule. Why: with the FS + DB stores in place (E.3.5/E.3.6), the engine is the last piece of the rule plane. Lifters (E.3.9–E.3.13) consume the engine; the worker bootstrap (E.3.14) wires watch_store into the asyncio event loop. After this commit a CompositeTagger constructed with a RuleEngine + a populated rules dir will produce real tags. Notes: - CompiledRule.emits extended to 4-tuple (technique_id, sub_technique_id, tactic, confidence). Tactic + confidence ride per-emit so a single rule can carry multiple precision targets (the "one event maps to many techniques" property). Compile helpers in both backends extract them from the YAML emits dict; missing tactic or confidence is a deploy-time error. - v0 match operator is "pattern" (regex). The field defaults per source_kind (command_text / raw_url / subject / verdict / …) and is overridable via match.field. Future ops (contains, equals, in_set) extend _match_event without touching the engine surface. - Confidence model: rules with state="clipped" + confidence_max set cap the per-emit confidence downward; clipped is a soft suppress, not a hard skip. Disabled rules are skipped wholly; expires_at past is re-checked at evaluate as defense-in-depth (the store auto-reverts, but a racing read between expiry and revert must not fire the rule). - _span(name, **attrs) helper in engine + both stores short-circuits on decnet.telemetry._ENABLED — matches the project's @traced / wrap_repository zero-overhead-when-disabled pattern instead of relying solely on the no-op tracer indirection. - Late-bound tracer (telemetry.get_tracer called per-span, not at module load) so test_tracing's monkeypatch reaches the production code path. xfails flipped: tests/ttp/test_rule_engine.py multi-emit fan-out + rule_version-collision-via-engine; tests/ttp/test_multi_mapping.py N×M engine fan-out + idempotent replay; tests/ttp/test_tracing.py ttp.eval span hierarchy + ttp.rule.fire span attributes. Tests: 214 passed, 19 xfailed (gated on E.3.8 lifters / rule pack / worker bootstrap). mypy: clean on prod code; pre-existing test-stub arg-type warnings unchanged.
2026-05-01 08:49:15 -04:00
parent 8a93ee3129
commit ed3f340ea8
10 changed files with 679 additions and 150 deletions
--- a/tests/ttp/store/test_conformance.py
+++ b/tests/ttp/store/test_conformance.py
@@ -51,7 +51,9 @@ applies_to: [command]
 match:
  pattern: 'hydra'
 emits:
-  - technique_id: T1110
+  - tactic: TA0006
+    technique_id: T1110
+    confidence: 0.85
 """


@@ -114,7 +116,7 @@ async def test_load_compiled_corpus_identical_across_backends(
        assert isinstance(c, CompiledRule)
        assert c.state == RuleState()
        assert c.applies_to == frozenset({"command"})
-        assert c.emits == (("T1110", None),)
+        assert c.emits == (("T1110", None, "TA0006", 0.85),)


 async def test_set_state_isolates_rules(rule_store: RuleStore) -> None:
--- a/tests/ttp/store/test_database.py
+++ b/tests/ttp/store/test_database.py
@@ -127,7 +127,7 @@ async def test_filesystem_to_db_sync_populates_ttp_rule(
        name="brute force ssh",
        applies_to=frozenset({"command"}),
        match_spec={"pattern": "hydra"},
-        emits=(("T1110", None),),
+        emits=(("T1110", None, "TA0006", 0.85),),
        evidence_fields=("matched_tokens",),
        state=RuleState(),
    )
@@ -151,7 +151,7 @@ async def test_filesystem_to_db_sync_populates_ttp_rule(
    loaded = await db_store.load_compiled()
    assert len(loaded) == 1
    assert loaded[0].rule_id == "R0001"
-    assert loaded[0].emits == (("T1110", None),)
+    assert loaded[0].emits == (("T1110", None, "TA0006", 0.85),)


@pytest.mark.skipif(
@@ -186,7 +186,9 @@ applies_to: [command]
 match:
  pattern: 'whoami'
 emits:
-  - technique_id: T1033
+  - tactic: TA0007
+    technique_id: T1033
+    confidence: 0.85
 """,
                encoding="utf-8",
            )
--- a/tests/ttp/store/test_filesystem.py
+++ b/tests/ttp/store/test_filesystem.py
@@ -48,7 +48,9 @@ applies_to: [command]
 match:
  pattern: 'hydra'
 emits:
-  - technique_id: T1110
+  - tactic: TA0006
+    technique_id: T1110
+    confidence: 0.85
 evidence_fields: [matched_tokens]
 """

@@ -190,7 +192,7 @@ def test_compiled_rule_is_frozen() -> None:
        name="test",
        applies_to=frozenset({"attacker_command"}),
        match_spec={},
-        emits=(("T1110", None),),
+        emits=(("T1110", None, "TA0006", 0.85),),
        evidence_fields=(),
        state=RuleState(),
    )
--- a/tests/ttp/test_multi_mapping.py
+++ b/tests/ttp/test_multi_mapping.py
@@ -152,34 +152,111 @@ def test_uuid_is_deterministic_replay_safe(
 # ── Engine fan-out (xfail until E.3.7) ──────────────────────────────


-@pytest.mark.xfail(
-    strict=True,
-    reason="impl phase E.3.7 — RuleEngine.evaluate() empty body returns "
-    "[]; engine-level fan-out lands with the engine impl",
-)
 def test_engine_emits_n_times_m_rows() -> None:
    """End-to-end: a synthetic event matched by 3 rules each emitting
    2 techniques produces 6 tag rows from ``RuleEngine.evaluate()``.
-
-    Today the engine returns ``[]`` so this assertion xfails. Flips
-    to GREEN at E.3.7 when the engine's dispatch + match + emit logic
-    lands.
    """
-    pytest.fail("RuleEngine.evaluate() fan-out not yet implemented")
+    import asyncio
+
+    from decnet.ttp.base import TaggerEvent
+    from decnet.ttp.impl.rule_engine import CompiledRule, RuleEngine
+    from decnet.ttp.store.base import RuleState
+
+    class _Stub:
+        async def load_compiled(self):  # pragma: no cover
+            return []
+
+        async def get_state(self, _):  # pragma: no cover
+            return RuleState()
+
+        async def set_state(self, *_a, **_kw):  # pragma: no cover
+            return None
+
+        def subscribe_changes(self):  # pragma: no cover
+            async def _g():
+                if False:
+                    yield None
+            return _g()
+
+    rules = [
+        CompiledRule(
+            rule_id=f"R000{i}",
+            rule_version=1,
+            name=f"r{i}",
+            applies_to=frozenset({"command"}),
+            match_spec={"pattern": "hydra"},
+            emits=(
+                (f"T{1000 + 2 * i}", None, "TA0006", 0.85),
+                (f"T{1001 + 2 * i}", None, "TA0006", 0.80),
+            ),
+            evidence_fields=(),
+            state=RuleState(),
+        )
+        for i in range(3)
+    ]
+    eng = RuleEngine(store=_Stub())
+    eng._by_kind = {"command": rules}
+    event = TaggerEvent(
+        source_kind="command",
+        source_id="src1",
+        attacker_uuid="att1",
+        identity_uuid=None,
+        session_id=None,
+        decky_id=None,
+        payload={"command_text": "hydra -l root ssh://1.2.3.4"},
+    )
+    out = asyncio.run(eng.evaluate(event))
+    assert len(out) == 6


-@pytest.mark.xfail(
-    strict=True,
-    reason="impl phase E.3.7 — re-running evaluate() on the same event "
-    "must produce zero NEW rows (idempotent UUID at engine level)",
-)
 def test_engine_replay_produces_no_new_rows() -> None:
    """Idempotency at the engine level: ``evaluate(e)`` followed by
    ``evaluate(e)`` again yields tag rows with identical UUIDs, so
    the downstream ``insert_tags`` no-ops the second batch.
-
-    Pure ``compute_tag_uuid`` determinism is already covered by
-    :func:`test_uuid_is_deterministic_replay_safe`; this test pins
-    the engine wiring around it.
    """
-    pytest.fail("RuleEngine replay-safety wiring not yet implemented")
+    import asyncio
+
+    from decnet.ttp.base import TaggerEvent
+    from decnet.ttp.impl.rule_engine import CompiledRule, RuleEngine
+    from decnet.ttp.store.base import RuleState
+
+    class _Stub:
+        async def load_compiled(self):  # pragma: no cover
+            return []
+
+        async def get_state(self, _):  # pragma: no cover
+            return RuleState()
+
+        async def set_state(self, *_a, **_kw):  # pragma: no cover
+            return None
+
+        def subscribe_changes(self):  # pragma: no cover
+            async def _g():
+                if False:
+                    yield None
+            return _g()
+
+    rule = CompiledRule(
+        rule_id="R0001",
+        rule_version=1,
+        name="r",
+        applies_to=frozenset({"command"}),
+        match_spec={"pattern": "hydra"},
+        emits=(("T1110", None, "TA0006", 0.85),),
+        evidence_fields=(),
+        state=RuleState(),
+    )
+    eng = RuleEngine(store=_Stub())
+    eng._by_kind = {"command": [rule]}
+    event = TaggerEvent(
+        source_kind="command",
+        source_id="src1",
+        attacker_uuid="att1",
+        identity_uuid=None,
+        session_id=None,
+        decky_id=None,
+        payload={"command_text": "hydra -l root ssh://1.2.3.4"},
+    )
+    out1 = asyncio.run(eng.evaluate(event))
+    out2 = asyncio.run(eng.evaluate(event))
+    assert {t.uuid for t in out1} == {t.uuid for t in out2}
--- a/tests/ttp/test_rule_engine.py
+++ b/tests/ttp/test_rule_engine.py
@@ -44,24 +44,52 @@ def _ev() -> TaggerEvent:


 class _StubStore:
-    """Minimal duck-typed RuleStore for contract-phase construction."""
+    """Minimal duck-typed RuleStore for engine construction in tests.
+
+    Provides the subset of the ABC the engine touches at construction
+    time. Tests that drive ``evaluate()`` populate ``eng._by_kind``
+    directly rather than going through ``watch_store()``; the
+    ``load_compiled`` / ``subscribe_changes`` stubs are only here so a
+    test that DOES want to drive the watch loop can opt in.
+    """
+
+    async def load_compiled(self) -> list[CompiledRule]:  # pragma: no cover
+        return []
+
+    async def get_state(self, _rule_id: str):  # pragma: no cover
+        from decnet.ttp.store.base import RuleState
+        return RuleState()
+
+    async def set_state(self, *_a: Any, **_kw: Any) -> None:  # pragma: no cover
+        return None
+
+    def subscribe_changes(self):  # pragma: no cover
+        async def _gen():
+            if False:
+                yield None
+        return _gen()


 def _make_compiled_rule(
    *,
    rule_id: str = "R0001",
    rule_version: int = 1,
-    emits: tuple[tuple[str, str | None], ...] = (("T1110", None),),
+    emits: tuple[tuple[str, str | None, str, float], ...] = (
+        ("T1110", None, "TA0006", 0.85),
+    ),
+    match_spec: dict[str, Any] | None = None,
 ) -> CompiledRule:
+    from decnet.ttp.store.base import RuleState  # noqa: PLC0415
+
    return CompiledRule(
        rule_id=rule_id,
        rule_version=rule_version,
        name="test rule",
        applies_to=frozenset({"command"}),
-        match_spec={"contains": "hydra"},
+        match_spec=match_spec or {"pattern": "hydra"},
        emits=emits,
        evidence_fields=("matched_tokens",),
-        state=object(),  # RuleState lands in E.1.11; opaque here
+        state=RuleState(),
    )


@@ -84,15 +112,17 @@ def test_compiled_rule_is_immutable() -> None:
    # NamedTuple gives us field-level immutability — the atomic-swap
    # property (E.2.14b) requires that a rule in the dispatch index
    # cannot be mutated in place; replacement is the only legal edit.
+    from decnet.ttp.store.base import RuleState  # noqa: PLC0415
+
    cr = CompiledRule(
        rule_id="R0001",
        rule_version=1,
        name="brute",
        applies_to=frozenset({"command"}),
        match_spec={},
-        emits=(("T1110", None),),
+        emits=(("T1110", None, "TA0006", 0.85),),
        evidence_fields=("matched_tokens",),
-        state=object(),
+        state=RuleState(),
    )
    with pytest.raises(AttributeError):
        cr.rule_id = "R9999"  # type: ignore[misc]
@@ -109,15 +139,28 @@ def test_rule_engine_init_signature_takes_store() -> None:
    assert list(sig.parameters)[1] == "store"


-def test_evaluate_returns_empty_list_in_contract_phase() -> None:
-    eng = RuleEngine(store=_StubStore()) 
+def test_evaluate_returns_empty_list_for_unknown_source_kind() -> None:
+    eng = RuleEngine(store=_StubStore())
    out = asyncio.run(eng.evaluate(_ev()))
    assert out == []


-def test_watch_store_returns_none_and_does_not_raise() -> None:
-    eng = RuleEngine(store=_StubStore()) 
-    assert asyncio.run(eng.watch_store()) is None
+def test_watch_store_drains_and_can_be_cancelled() -> None:
+    """``watch_store()`` blocks on ``subscribe_changes`` after loading
+    the empty corpus. Test that it can be cancelled cleanly — the
+    worker bootstrap (E.3.14) cancels it during shutdown."""
+    eng = RuleEngine(store=_StubStore())
+
+    async def _drive() -> None:
+        task = asyncio.create_task(eng.watch_store())
+        await asyncio.sleep(0.05)
+        task.cancel()
+        try:
+            await task
+        except asyncio.CancelledError:
+            pass
+
+    asyncio.run(_drive())


 def test_rule_schema_has_documented_fields() -> None:
@@ -208,21 +251,22 @@ def test_e25_evaluate_unknown_source_kind_returns_empty() -> None:
    assert asyncio.run(eng.evaluate(weird)) == []


-@pytest.mark.xfail(
-    strict=True,
-    reason="impl phase E.3.5: evaluate() does not yet fan out emits",
-)
 def test_e25_one_rule_multiple_emits_produces_multiple_tags() -> None:
    """One matching rule with N entries in ``emits`` must produce N
    tag rows from a single event. The "one event maps to many
    techniques" property enforced at engine level."""
-    eng = RuleEngine(store=_StubStore()) 
+    eng = RuleEngine(store=_StubStore())
    rule = _make_compiled_rule(
        rule_id="R_MULTI",
-        emits=(("T1110", None), ("T1078", None), ("T1059", "001")),
+        emits=(
+            ("T1110", None, "TA0006", 0.85),
+            ("T1078", None, "TA0001", 0.80),
+            ("T1059", "001", "TA0002", 0.90),
+        ),
    )
    eng._by_kind = {"command": [rule]}
-    out = asyncio.run(eng.evaluate(_ev()))
+    event = _ev()._replace(payload={"command_text": "hydra -l root ssh://1.2.3.4"})
+    out = asyncio.run(eng.evaluate(event))
    assert len(out) == 3
    techs = {(t.technique_id, t.sub_technique_id) for t in out}
    assert techs == {("T1110", None), ("T1078", None), ("T1059", "001")}
@@ -253,19 +297,16 @@ def test_e25_rule_version_collision_yields_distinct_tag_uuids() -> None:
    assert u_v1 != u_v2


-@pytest.mark.xfail(
-    strict=True,
-    reason="impl phase E.3.5: evaluate() does not yet emit tags",
-)
 def test_e25_rule_version_collision_via_engine_yields_distinct_tag_uuids() -> None:
    """Same property as above, but driven through the engine: two
    CompiledRule instances differing only in rule_version produce two
    rows whose ``uuid`` columns differ."""
-    eng = RuleEngine(store=_StubStore()) 
+    eng = RuleEngine(store=_StubStore())
    r1 = _make_compiled_rule(rule_id="R_VER", rule_version=1)
    r2 = _make_compiled_rule(rule_id="R_VER", rule_version=2)
    eng._by_kind = {"command": [r1, r2]}
-    out = asyncio.run(eng.evaluate(_ev()))
+    event = _ev()._replace(payload={"command_text": "hydra -l root ssh://1.2.3.4"})
+    out = asyncio.run(eng.evaluate(event))
    assert len(out) == 2
    uuids = {t.uuid for t in out}
    assert len(uuids) == 2
--- a/tests/ttp/test_tracing.py
+++ b/tests/ttp/test_tracing.py
@@ -121,15 +121,58 @@ def span_exporter(
 # ── Eval span hierarchy (xfail until E.3.7) ─────────────────────────


-@pytest.mark.xfail(
-    strict=True,
-    reason="impl phase E.3.7 — RuleEngine.evaluate() emits no spans "
-    "today; ttp.eval span lands with the engine impl",
-)
-def test_eval_emits_top_level_span(span_exporter: tuple[InMemorySpanExporter, TracerProvider]) -> None:
+def test_eval_emits_top_level_span(
+    span_exporter: tuple[InMemorySpanExporter, TracerProvider],
+) -> None:
    """``evaluate()`` produces a ``ttp.eval`` span with
    ``attacker_uuid`` and ``identity_uuid`` attributes."""
-    pytest.fail("ttp.eval span not yet emitted")
+    import asyncio
+
+    from decnet.ttp.base import TaggerEvent
+    from decnet.ttp.impl.rule_engine import CompiledRule, RuleEngine
+    from decnet.ttp.store.base import RuleState
+
+    class _Stub:
+        async def load_compiled(self):  # pragma: no cover
+            return []
+
+        async def get_state(self, _):  # pragma: no cover
+            return RuleState()
+
+        async def set_state(self, *_a, **_kw):  # pragma: no cover
+            return None
+
+        def subscribe_changes(self):  # pragma: no cover
+            async def _g():
+                if False:
+                    yield None
+            return _g()
+
+    exporter, _ = span_exporter
+    rule = CompiledRule(
+        rule_id="R0001",
+        rule_version=1,
+        name="r",
+        applies_to=frozenset({"command"}),
+        match_spec={"pattern": "hydra"},
+        emits=(("T1110", None, "TA0006", 0.85),),
+        evidence_fields=(),
+        state=RuleState(),
+    )
+    eng = RuleEngine(store=_Stub())
+    eng._by_kind = {"command": [rule]}
+    event = TaggerEvent(
+        source_kind="command", source_id="src1",
+        attacker_uuid="ATT_X", identity_uuid="IDY_Y",
+        session_id=None, decky_id=None,
+        payload={"command_text": "hydra"},
+    )
+    asyncio.run(eng.evaluate(event))
+    eval_spans = [s for s in exporter.get_finished_spans() if s.name == "ttp.eval"]
+    assert eval_spans
+    attrs = dict(eval_spans[0].attributes or {})
+    assert attrs.get("attacker_uuid") == "ATT_X"
+    assert attrs.get("identity_uuid") == "IDY_Y"


@pytest.mark.xfail(
@@ -143,17 +186,59 @@ def test_lifter_child_spans_emitted(span_exporter: tuple[InMemorySpanExporter, T
    pytest.fail("per-lifter spans not yet emitted")


-@pytest.mark.xfail(
-    strict=True,
-    reason="impl phase E.3.7 — ttp.rule.fire spans with rule_id + "
-    "technique_id land with the engine impl",
-)
 def test_rule_fire_spans_carry_rule_and_technique_attrs(
    span_exporter: tuple[InMemorySpanExporter, TracerProvider],
 ) -> None:
    """Each matched rule produces a ``ttp.rule.fire`` span with
    ``rule_id`` and ``technique_id`` attributes set."""
-    pytest.fail("ttp.rule.fire spans not yet emitted")
+    import asyncio
+
+    from decnet.ttp.base import TaggerEvent
+    from decnet.ttp.impl.rule_engine import CompiledRule, RuleEngine
+    from decnet.ttp.store.base import RuleState
+
+    class _Stub:
+        async def load_compiled(self):  # pragma: no cover
+            return []
+
+        async def get_state(self, _):  # pragma: no cover
+            return RuleState()
+
+        async def set_state(self, *_a, **_kw):  # pragma: no cover
+            return None
+
+        def subscribe_changes(self):  # pragma: no cover
+            async def _g():
+                if False:
+                    yield None
+            return _g()
+
+    exporter, _ = span_exporter
+    rule = CompiledRule(
+        rule_id="R_FIRE",
+        rule_version=1,
+        name="r",
+        applies_to=frozenset({"command"}),
+        match_spec={"pattern": "hydra"},
+        emits=(("T1110", None, "TA0006", 0.85),),
+        evidence_fields=(),
+        state=RuleState(),
+    )
+    eng = RuleEngine(store=_Stub())
+    eng._by_kind = {"command": [rule]}
+    asyncio.run(eng.evaluate(TaggerEvent(
+        source_kind="command", source_id="s",
+        attacker_uuid="a", identity_uuid=None,
+        session_id=None, decky_id=None,
+        payload={"command_text": "hydra"},
+    )))
+    fire_spans = [
+        s for s in exporter.get_finished_spans() if s.name == "ttp.rule.fire"
+    ]
+    assert fire_spans
+    attrs = dict(fire_spans[0].attributes or {})
+    assert attrs.get("rule_id") == "R_FIRE"
+    assert attrs.get("technique_id") == "T1110"


 # ── set_state span hierarchy (xfail until E.3.5/E.3.6) ──────────────