Replace pytest.fail() stub with actual test body: constructs IntelLifter with R0054, feeds score=30 payload, asserts confidence=0.21 (0.70×0.30) which is below CONFIDENCE_FLOOR. xfail marker removed. Corrects docstring: R0054 T1110 base_conf=0.70, not 0.85 as originally written.
151 lines
5.6 KiB
Python
151 lines
5.6 KiB
Python
"""E.2.10 — Confidence model tests.
|
||
|
||
Pins the confidence calculus from ``development/TTP_TAGGING.md``
|
||
§"Confidence model":
|
||
|
||
* The worker may adjust a rule's base confidence DOWNWARD only.
|
||
``confidence × multiplier`` (for ``multiplier ∈ [0, 1]``) never
|
||
exceeds the rule's base. Property-tested via Hypothesis.
|
||
* A computed confidence below ``0.3`` is dropped at write time —
|
||
``insert_tags()`` receives the row but writes nothing and the
|
||
drop is reflected in the returned count.
|
||
* Worked example: AbuseIPDB ``score=30`` → ``0.85 × 0.30 = 0.255`` →
|
||
dropped, no row written.
|
||
|
||
Pure-arithmetic assertions are GREEN today. Behavior beyond pure
|
||
math (`insert_tags` drop semantics, `intel_lifter` provider-score
|
||
multiplier wiring) lives behind ``xfail(strict=True)`` until the
|
||
matching E.3 implementation step lands.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import pytest
|
||
from hypothesis import given, strategies as st
|
||
|
||
CONFIDENCE_FLOOR: float = 0.3
|
||
|
||
|
||
def _adjust(base: float, multiplier: float) -> float:
|
||
"""Reference implementation of the downward-only adjustment.
|
||
|
||
The real worker (lands at E.3.7 / E.3.10) computes the same
|
||
quantity and either writes or drops the resulting tag based on
|
||
``CONFIDENCE_FLOOR``. Pinning the formula here as a separate
|
||
callable lets the property test run today without depending on
|
||
not-yet-implemented worker code; the impl phase asserts
|
||
equivalence by replaying a fixture corpus through the worker
|
||
and comparing against this helper.
|
||
"""
|
||
if not 0.0 <= multiplier <= 1.0:
|
||
raise ValueError(f"multiplier {multiplier!r} outside [0, 1]")
|
||
return base * multiplier
|
||
|
||
|
||
# ── Pure-math properties (GREEN today) ──────────────────────────────
|
||
|
||
|
||
@given(
|
||
base=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
|
||
multiplier=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
|
||
)
|
||
def test_adjustment_is_downward_only(base: float, multiplier: float) -> None:
|
||
"""Property: ``confidence × multiplier`` ≤ rule's base.
|
||
|
||
The worker is forbidden from raising a tag's confidence above
|
||
the rule that emitted it. Multipliers come from honeypot context
|
||
(decky realism), repetition, identity coherence — all in
|
||
``[0, 1]``. Catches a future contributor "boosting" confidence
|
||
via a multiplier > 1.
|
||
"""
|
||
adjusted = _adjust(base, multiplier)
|
||
assert adjusted <= base + 1e-12 # FP slack
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"base,multiplier,expected",
|
||
[
|
||
(0.85, 0.30, 0.255), # AbuseIPDB score=30 worked example
|
||
(1.0, 1.0, 1.0),
|
||
(0.6, 0.5, 0.3), # Right at the floor
|
||
(0.5, 0.5, 0.25), # Below the floor
|
||
],
|
||
)
|
||
def test_known_inputs_match_worked_example(
|
||
base: float, multiplier: float, expected: float,
|
||
) -> None:
|
||
assert _adjust(base, multiplier) == pytest.approx(expected)
|
||
|
||
|
||
def test_floor_constant_pins_doc_value() -> None:
|
||
"""Pin the floor at 0.3 so a future contributor cannot quietly
|
||
relax it without updating the doc + this test together."""
|
||
assert CONFIDENCE_FLOOR == 0.3
|
||
|
||
|
||
def test_invalid_multiplier_raises() -> None:
|
||
with pytest.raises(ValueError):
|
||
_adjust(0.85, 1.5)
|
||
with pytest.raises(ValueError):
|
||
_adjust(0.85, -0.1)
|
||
|
||
|
||
# ── Drop-below-0.3 + provider multiplier (xfail until E.3) ──────────
|
||
|
||
|
||
def test_below_floor_dropped_at_insert() -> None:
|
||
"""``insert_tags`` writes the row only when ``confidence ≥ 0.3``.
|
||
|
||
Below-floor rows are silently dropped; the returned int reflects
|
||
the drop (i.e. ``len(rows_in) - drops``). Verified at the mixin
|
||
layer by inspecting :data:`_CONFIDENCE_FLOOR` and the filtering
|
||
branch in :meth:`TTPMixin.insert_tags`.
|
||
"""
|
||
from decnet.web.db.sqlmodel_repo.ttp import _CONFIDENCE_FLOOR
|
||
assert _CONFIDENCE_FLOOR == CONFIDENCE_FLOOR
|
||
|
||
# The end-to-end I/O assertion lives in
|
||
# ``tests/web/db/test_ttp_repo.py`` (E.2.13) where the
|
||
# ``db_backends`` fixture is wired up. This pure-Python test pins
|
||
# the floor constant and the filter semantics — replacing the
|
||
# value below 0.3 must result in zero rows passing the floor.
|
||
rows_below = [_adjust(0.85, 0.30) for _ in range(5)]
|
||
assert all(v < CONFIDENCE_FLOOR for v in rows_below)
|
||
|
||
|
||
def test_abuseipdb_score_30_dropped() -> None:
|
||
"""End-to-end worked example: AbuseIPDB score=30 → 0.21 → dropped.
|
||
|
||
R0054 T1110 base_conf=0.70. Multiplier = 30/100 = 0.30.
|
||
0.70 × 0.30 = 0.21 < CONFIDENCE_FLOOR → tag is emitted by the lifter
|
||
but insert_tags drops it.
|
||
"""
|
||
import asyncio
|
||
from pathlib import Path
|
||
from decnet.ttp.base import TaggerEvent
|
||
from decnet.ttp.impl.intel_lifter import IntelLifter
|
||
from decnet.ttp.store.base import RuleState
|
||
from decnet.ttp.store.impl.filesystem import _parse_and_compile
|
||
from tests.ttp._stub_store import StubRuleStore
|
||
|
||
rules_dir = Path(__file__).resolve().parents[2] / "rules" / "ttp"
|
||
rule = _parse_and_compile(rules_dir / "R0054.yaml", RuleState())
|
||
lifter = IntelLifter(StubRuleStore(compiled=[rule]))
|
||
lifter._index.install(rule)
|
||
|
||
ev = TaggerEvent(
|
||
source_kind="intel",
|
||
source_id="src-confidence-test",
|
||
attacker_uuid="att1",
|
||
identity_uuid=None,
|
||
session_id=None,
|
||
decky_id=None,
|
||
payload={"abuseipdb_score": 30, "abuseipdb_categories": [18, 22]},
|
||
)
|
||
out = asyncio.run(lifter.tag(ev))
|
||
assert out, "intel lifter emitted no tags — multiplier not applied"
|
||
for tag in out:
|
||
assert tag.confidence == pytest.approx(0.21, rel=1e-4), (
|
||
f"expected 0.70×0.30=0.21, got {tag.confidence!r}"
|
||
)
|
||
assert tag.confidence < CONFIDENCE_FLOOR
|