feat(ttp): E.3.5 FilesystemRuleStore — inotify hot-reload + per-rule events

Implements the filesystem-backed rule store body left empty at contract
phase: YAML parse + Pydantic validation, asyncinotify watch over
./rules/ttp/, in-process state cache with auto-revert on expires_at,
and a subscribe_changes() async iterator yielding one RuleChange per
per-rule edit. Bus topic builders ttp_rule_reloaded / ttp_rule_state
ship alongside.

Why: the rule plane needed a store before the engine (E.3.7) could
consume RuleChange events and atomically swap compiled rules into its
dispatch index.

Notes:
- Linux-only by construction (asyncinotify wheel gated by sys_platform
  marker; FilesystemRuleStore.__init__ raises on non-Linux).
- Filename allowlist is the FIRST check on every inotify event.
- Content-hash dedup so a single write firing IN_CREATE + IN_CLOSE_WRITE
  produces exactly one RuleChange.
- All compile work serializes on a single asyncio.Lock.
- Subscribers register their queue eagerly so events fired between
  subscribe_changes() and the first __anext__() are buffered.

xfails flipped: per-save-style + filter-ordering + atomic-swap in
test_filesystem.py; load_compiled / set_state isolation / round-trip /
per-rule fan-out / expired-state revert / set_state failure semantics
in test_conformance.py (FS side; DB side stays xfail until E.3.6);
malformed-YAML compile-time check in test_rule_engine.py.

Tests: 197 passed, 35 xfailed (gated on E.3.6 / E.3.7 / lifters).
mypy + bandit: clean on all touched files.

Wiki update for the per-rule reload + state-change topics lands in a
matching wiki-checkout/Service-Bus.md edit (separate repo).
This commit is contained in:
2026-05-01 08:31:05 -04:00
parent 89ce893792
commit f41995a229
8 changed files with 768 additions and 152 deletions

View File

@@ -30,13 +30,41 @@ atomic-swap concurrency) live in :mod:`test_filesystem`.
"""
from __future__ import annotations
import asyncio
import inspect
from datetime import datetime, timedelta, timezone
from pathlib import Path
import pytest
from decnet.ttp.impl.rule_engine import CompiledRule
from decnet.ttp.store.base import RuleChange, RuleState, RuleStore
_RULE_YAML = """\
rule_id: {rule_id}
rule_version: 1
name: test rule
applies_to: [command]
match:
pattern: 'hydra'
emits:
- technique_id: T1110
"""
def _xfail_db_until_e36(rule_store: RuleStore) -> None:
"""Skip a parametrized run for the database backend.
The conformance contract is identical across backends, but the
DB backend's persistence path lands at E.3.6. Per-test xfail
rather than a module-level skip so the FS-backend run still
exercises the assertion today.
"""
if type(rule_store).__name__ == "DatabaseRuleStore":
pytest.xfail("impl phase E.3.6 — DatabaseRuleStore not implemented")
# ── Surface (GREEN today) ───────────────────────────────────────────
@@ -87,52 +115,60 @@ async def test_get_state_unknown_returns_default(rule_store: RuleStore) -> None:
# ── Behavioral conformance (xfail until E.3.5/E.3.6) ────────────────
@pytest.mark.xfail(
strict=True,
reason="impl phase E.3.5/E.3.6 — load_compiled lands with each "
"backend's parse-and-compile implementation",
)
async def test_load_compiled_corpus_identical_across_backends(
rule_store: RuleStore,
rule_store: RuleStore, tmp_path: Path,
) -> None:
"""Both backends, given the same YAML corpus, return the same
set of ``CompiledRule`` (modulo state defaulting). The doc's
cross-backend property requires running the same fixture against
both — pinned here as a single test that the parametrize fans
out over both backends."""
pytest.fail("load_compiled not yet implemented")
_xfail_db_until_e36(rule_store)
rules_dir: Path = rule_store._rules_dir # type: ignore[attr-defined]
(rules_dir / "R0001.yaml").write_text(
_RULE_YAML.format(rule_id="R0001"), encoding="utf-8",
)
(rules_dir / "R0002.yaml").write_text(
_RULE_YAML.format(rule_id="R0002"), encoding="utf-8",
)
compiled = await rule_store.load_compiled()
assert {c.rule_id for c in compiled} == {"R0001", "R0002"}
for c in compiled:
assert isinstance(c, CompiledRule)
assert c.state == RuleState()
assert c.applies_to == frozenset({"command"})
assert c.emits == (("T1110", None),)
@pytest.mark.xfail(
strict=True,
reason="impl phase E.3.5/E.3.6 — set_state lands with each "
"backend's persistence implementation",
)
async def test_set_state_isolates_rules(rule_store: RuleStore) -> None:
"""``set_state(A, ...)`` does not perturb the state read by
``get_state(B)``. Catches a refactor that accidentally writes
a global cache key."""
pytest.fail("set_state not yet implemented")
``get_state(B)``."""
_xfail_db_until_e36(rule_store)
await rule_store.set_state(
"R0001", RuleState(state="disabled", reason="A"), set_by="op",
)
other = await rule_store.get_state("R0002")
assert other == RuleState() # B untouched
@pytest.mark.xfail(
strict=True,
reason="impl phase E.3.5/E.3.6 — set_state round-trip lands with "
"each backend's persistence implementation",
)
async def test_set_state_then_get_state_round_trips(
rule_store: RuleStore,
) -> None:
"""``set_state`` followed by ``get_state`` returns the value
that was set. No translation, no field drop."""
pytest.fail("set_state round-trip not yet implemented")
_xfail_db_until_e36(rule_store)
new_state = RuleState(
state="clipped", confidence_max=0.5, reason="probation",
)
await rule_store.set_state("R0001", new_state, set_by="op")
got = await rule_store.get_state("R0001")
assert got.state == "clipped"
assert got.confidence_max == 0.5
assert got.reason == "probation"
assert got.set_by == "op"
assert got.set_at is not None
@pytest.mark.xfail(
strict=True,
reason="impl phase E.3.5/E.3.6 — subscribe_changes incremental "
"fan-out lands with each backend's watch implementation",
)
async def test_subscribe_changes_per_rule_not_batched(
rule_store: RuleStore,
) -> None:
@@ -141,33 +177,64 @@ async def test_subscribe_changes_per_rule_not_batched(
entries. The bus per-rule fan-out
(``ttp.rule.reloaded.{rule_id}``) inherits its granularity from
this iterator."""
pytest.fail("subscribe_changes not yet implemented")
_xfail_db_until_e36(rule_store)
sub = rule_store.subscribe_changes()
for i in range(5):
await rule_store.set_state(
f"R000{i}", RuleState(state="disabled"), set_by="op",
)
seen: list[RuleChange] = []
for _ in range(5):
seen.append(await asyncio.wait_for(sub.__anext__(), timeout=2.0))
rule_ids = {ch.rule_id for ch in seen}
assert rule_ids == {f"R000{i}" for i in range(5)}
for ch in seen:
assert ch.change_kind == "state"
assert isinstance(ch.new_value, RuleState)
@pytest.mark.xfail(
strict=True,
reason="impl phase E.3.5/E.3.6 — expires_at auto-revert + "
"ttp.rule.state.{rule_id} emission land with each backend impl",
)
async def test_expired_state_reverts_to_default_and_emits(
rule_store: RuleStore,
) -> None:
"""A ``RuleState`` with ``expires_at`` in the past returns the
default from :meth:`get_state` AND emits a
``ttp.rule.state.{rule_id}`` auto-revert event."""
pytest.fail("expires_at auto-revert not yet implemented")
_xfail_db_until_e36(rule_store)
past = datetime.now(tz=timezone.utc) - timedelta(seconds=5)
sub = rule_store.subscribe_changes()
await rule_store.set_state(
"R0001",
RuleState(state="disabled", expires_at=past),
set_by="op",
)
# Drain the set_state event we just produced.
await asyncio.wait_for(sub.__anext__(), timeout=2.0)
state = await rule_store.get_state("R0001")
assert state == RuleState()
revert = await asyncio.wait_for(sub.__anext__(), timeout=2.0)
assert revert.change_kind == "state"
assert revert.rule_id == "R0001"
assert revert.new_value == RuleState()
@pytest.mark.xfail(
strict=True,
reason="impl phase E.3.5/E.3.6 — set_state failure semantics "
"(raise, never silently drop) land with each backend impl",
)
async def test_set_state_failure_raises_not_silent(
rule_store: RuleStore,
) -> None:
"""A backend failure during :meth:`set_state` (e.g. DB write
error, disk full) MUST raise rather than silently drop.
Operational state changes are NOT a tolerated-absence path —
state drift would be silent and dangerous."""
pytest.fail("set_state failure semantics not yet implemented")
"""A backend failure during :meth:`set_state` (e.g. queue
death) MUST raise rather than silently drop. Operational state
changes are NOT a tolerated-absence path — state drift would be
silent and dangerous."""
_xfail_db_until_e36(rule_store)
class _BoomQueue:
async def put(self, _item: object) -> None:
raise RuntimeError("simulated backend failure")
# Inject a poisoned subscriber so the publish path raises.
if not hasattr(rule_store, "_subscribers"): # pragma: no cover
pytest.skip("backend has no subscriber fan-out hook")
rule_store._subscribers.append(_BoomQueue())
with pytest.raises(RuntimeError, match="simulated backend failure"):
await rule_store.set_state(
"R0001", RuleState(state="disabled"), set_by="op",
)