feat(ttp): E.3.7 RuleEngine — evaluate + atomic-swap watch_store

Implements the rule engine body left empty at contract phase: evaluate() dispatches by source_kind through self._by_kind, runs the rule's match spec against event.payload, and emits one TTPTag per emits entry. watch_store() loads the initial corpus from RuleStore.load_compiled, then drains subscribe_changes, applying definition changes via single-statement dict assignment (atomic swap, GIL-atomic to readers) and state changes via NamedTuple._replace on the existing CompiledRule. Why: with the FS + DB stores in place (E.3.5/E.3.6), the engine is the last piece of the rule plane. Lifters (E.3.9–E.3.13) consume the engine; the worker bootstrap (E.3.14) wires watch_store into the asyncio event loop. After this commit a CompositeTagger constructed with a RuleEngine + a populated rules dir will produce real tags. Notes: - CompiledRule.emits extended to 4-tuple (technique_id, sub_technique_id, tactic, confidence). Tactic + confidence ride per-emit so a single rule can carry multiple precision targets (the "one event maps to many techniques" property). Compile helpers in both backends extract them from the YAML emits dict; missing tactic or confidence is a deploy-time error. - v0 match operator is "pattern" (regex). The field defaults per source_kind (command_text / raw_url / subject / verdict / …) and is overridable via match.field. Future ops (contains, equals, in_set) extend _match_event without touching the engine surface. - Confidence model: rules with state="clipped" + confidence_max set cap the per-emit confidence downward; clipped is a soft suppress, not a hard skip. Disabled rules are skipped wholly; expires_at past is re-checked at evaluate as defense-in-depth (the store auto-reverts, but a racing read between expiry and revert must not fire the rule). - _span(name, **attrs) helper in engine + both stores short-circuits on decnet.telemetry._ENABLED — matches the project's @traced / wrap_repository zero-overhead-when-disabled pattern instead of relying solely on the no-op tracer indirection. - Late-bound tracer (telemetry.get_tracer called per-span, not at module load) so test_tracing's monkeypatch reaches the production code path. xfails flipped: tests/ttp/test_rule_engine.py multi-emit fan-out + rule_version-collision-via-engine; tests/ttp/test_multi_mapping.py N×M engine fan-out + idempotent replay; tests/ttp/test_tracing.py ttp.eval span hierarchy + ttp.rule.fire span attributes. Tests: 214 passed, 19 xfailed (gated on E.3.8 lifters / rule pack / worker bootstrap). mypy: clean on prod code; pre-existing test-stub arg-type warnings unchanged.
2026-05-01 08:49:15 -04:00
parent 8a93ee3129
commit ed3f340ea8
10 changed files with 679 additions and 150 deletions
--- a/tests/ttp/test_multi_mapping.py
+++ b/tests/ttp/test_multi_mapping.py
@@ -152,34 +152,111 @@ def test_uuid_is_deterministic_replay_safe(
 # ── Engine fan-out (xfail until E.3.7) ──────────────────────────────


-@pytest.mark.xfail(
-    strict=True,
-    reason="impl phase E.3.7 — RuleEngine.evaluate() empty body returns "
-    "[]; engine-level fan-out lands with the engine impl",
-)
 def test_engine_emits_n_times_m_rows() -> None:
    """End-to-end: a synthetic event matched by 3 rules each emitting
    2 techniques produces 6 tag rows from ``RuleEngine.evaluate()``.
-
-    Today the engine returns ``[]`` so this assertion xfails. Flips
-    to GREEN at E.3.7 when the engine's dispatch + match + emit logic
-    lands.
    """
-    pytest.fail("RuleEngine.evaluate() fan-out not yet implemented")
+    import asyncio
+
+    from decnet.ttp.base import TaggerEvent
+    from decnet.ttp.impl.rule_engine import CompiledRule, RuleEngine
+    from decnet.ttp.store.base import RuleState
+
+    class _Stub:
+        async def load_compiled(self):  # pragma: no cover
+            return []
+
+        async def get_state(self, _):  # pragma: no cover
+            return RuleState()
+
+        async def set_state(self, *_a, **_kw):  # pragma: no cover
+            return None
+
+        def subscribe_changes(self):  # pragma: no cover
+            async def _g():
+                if False:
+                    yield None
+            return _g()
+
+    rules = [
+        CompiledRule(
+            rule_id=f"R000{i}",
+            rule_version=1,
+            name=f"r{i}",
+            applies_to=frozenset({"command"}),
+            match_spec={"pattern": "hydra"},
+            emits=(
+                (f"T{1000 + 2 * i}", None, "TA0006", 0.85),
+                (f"T{1001 + 2 * i}", None, "TA0006", 0.80),
+            ),
+            evidence_fields=(),
+            state=RuleState(),
+        )
+        for i in range(3)
+    ]
+    eng = RuleEngine(store=_Stub())
+    eng._by_kind = {"command": rules}
+    event = TaggerEvent(
+        source_kind="command",
+        source_id="src1",
+        attacker_uuid="att1",
+        identity_uuid=None,
+        session_id=None,
+        decky_id=None,
+        payload={"command_text": "hydra -l root ssh://1.2.3.4"},
+    )
+    out = asyncio.run(eng.evaluate(event))
+    assert len(out) == 6


-@pytest.mark.xfail(
-    strict=True,
-    reason="impl phase E.3.7 — re-running evaluate() on the same event "
-    "must produce zero NEW rows (idempotent UUID at engine level)",
-)
 def test_engine_replay_produces_no_new_rows() -> None:
    """Idempotency at the engine level: ``evaluate(e)`` followed by
    ``evaluate(e)`` again yields tag rows with identical UUIDs, so
    the downstream ``insert_tags`` no-ops the second batch.
-
-    Pure ``compute_tag_uuid`` determinism is already covered by
-    :func:`test_uuid_is_deterministic_replay_safe`; this test pins
-    the engine wiring around it.
    """
-    pytest.fail("RuleEngine replay-safety wiring not yet implemented")
+    import asyncio
+
+    from decnet.ttp.base import TaggerEvent
+    from decnet.ttp.impl.rule_engine import CompiledRule, RuleEngine
+    from decnet.ttp.store.base import RuleState
+
+    class _Stub:
+        async def load_compiled(self):  # pragma: no cover
+            return []
+
+        async def get_state(self, _):  # pragma: no cover
+            return RuleState()
+
+        async def set_state(self, *_a, **_kw):  # pragma: no cover
+            return None
+
+        def subscribe_changes(self):  # pragma: no cover
+            async def _g():
+                if False:
+                    yield None
+            return _g()
+
+    rule = CompiledRule(
+        rule_id="R0001",
+        rule_version=1,
+        name="r",
+        applies_to=frozenset({"command"}),
+        match_spec={"pattern": "hydra"},
+        emits=(("T1110", None, "TA0006", 0.85),),
+        evidence_fields=(),
+        state=RuleState(),
+    )
+    eng = RuleEngine(store=_Stub())
+    eng._by_kind = {"command": [rule]}
+    event = TaggerEvent(
+        source_kind="command",
+        source_id="src1",
+        attacker_uuid="att1",
+        identity_uuid=None,
+        session_id=None,
+        decky_id=None,
+        payload={"command_text": "hydra -l root ssh://1.2.3.4"},
+    )
+    out1 = asyncio.run(eng.evaluate(event))
+    out2 = asyncio.run(eng.evaluate(event))
+    assert {t.uuid for t in out1} == {t.uuid for t in out2}