Files
DECNET/tests/ttp/test_confidence.py
anti 471b33df1b feat(ttp): enable test_abuseipdb_score_30_dropped — impl was already done
Replace pytest.fail() stub with actual test body: constructs IntelLifter
with R0054, feeds score=30 payload, asserts confidence=0.21 (0.70×0.30)
which is below CONFIDENCE_FLOOR. xfail marker removed.

Corrects docstring: R0054 T1110 base_conf=0.70, not 0.85 as originally written.
2026-05-10 08:08:29 -04:00

151 lines
5.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""E.2.10 — Confidence model tests.
Pins the confidence calculus from ``development/TTP_TAGGING.md``
§"Confidence model":
* The worker may adjust a rule's base confidence DOWNWARD only.
``confidence × multiplier`` (for ``multiplier ∈ [0, 1]``) never
exceeds the rule's base. Property-tested via Hypothesis.
* A computed confidence below ``0.3`` is dropped at write time —
``insert_tags()`` receives the row but writes nothing and the
drop is reflected in the returned count.
* Worked example: AbuseIPDB ``score=30`` → ``0.85 × 0.30 = 0.255`` →
dropped, no row written.
Pure-arithmetic assertions are GREEN today. Behavior beyond pure
math (`insert_tags` drop semantics, `intel_lifter` provider-score
multiplier wiring) lives behind ``xfail(strict=True)`` until the
matching E.3 implementation step lands.
"""
from __future__ import annotations
import pytest
from hypothesis import given, strategies as st
CONFIDENCE_FLOOR: float = 0.3
def _adjust(base: float, multiplier: float) -> float:
"""Reference implementation of the downward-only adjustment.
The real worker (lands at E.3.7 / E.3.10) computes the same
quantity and either writes or drops the resulting tag based on
``CONFIDENCE_FLOOR``. Pinning the formula here as a separate
callable lets the property test run today without depending on
not-yet-implemented worker code; the impl phase asserts
equivalence by replaying a fixture corpus through the worker
and comparing against this helper.
"""
if not 0.0 <= multiplier <= 1.0:
raise ValueError(f"multiplier {multiplier!r} outside [0, 1]")
return base * multiplier
# ── Pure-math properties (GREEN today) ──────────────────────────────
@given(
base=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
multiplier=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
)
def test_adjustment_is_downward_only(base: float, multiplier: float) -> None:
"""Property: ``confidence × multiplier`` ≤ rule's base.
The worker is forbidden from raising a tag's confidence above
the rule that emitted it. Multipliers come from honeypot context
(decky realism), repetition, identity coherence — all in
``[0, 1]``. Catches a future contributor "boosting" confidence
via a multiplier > 1.
"""
adjusted = _adjust(base, multiplier)
assert adjusted <= base + 1e-12 # FP slack
@pytest.mark.parametrize(
"base,multiplier,expected",
[
(0.85, 0.30, 0.255), # AbuseIPDB score=30 worked example
(1.0, 1.0, 1.0),
(0.6, 0.5, 0.3), # Right at the floor
(0.5, 0.5, 0.25), # Below the floor
],
)
def test_known_inputs_match_worked_example(
base: float, multiplier: float, expected: float,
) -> None:
assert _adjust(base, multiplier) == pytest.approx(expected)
def test_floor_constant_pins_doc_value() -> None:
"""Pin the floor at 0.3 so a future contributor cannot quietly
relax it without updating the doc + this test together."""
assert CONFIDENCE_FLOOR == 0.3
def test_invalid_multiplier_raises() -> None:
with pytest.raises(ValueError):
_adjust(0.85, 1.5)
with pytest.raises(ValueError):
_adjust(0.85, -0.1)
# ── Drop-below-0.3 + provider multiplier (xfail until E.3) ──────────
def test_below_floor_dropped_at_insert() -> None:
"""``insert_tags`` writes the row only when ``confidence ≥ 0.3``.
Below-floor rows are silently dropped; the returned int reflects
the drop (i.e. ``len(rows_in) - drops``). Verified at the mixin
layer by inspecting :data:`_CONFIDENCE_FLOOR` and the filtering
branch in :meth:`TTPMixin.insert_tags`.
"""
from decnet.web.db.sqlmodel_repo.ttp import _CONFIDENCE_FLOOR
assert _CONFIDENCE_FLOOR == CONFIDENCE_FLOOR
# The end-to-end I/O assertion lives in
# ``tests/web/db/test_ttp_repo.py`` (E.2.13) where the
# ``db_backends`` fixture is wired up. This pure-Python test pins
# the floor constant and the filter semantics — replacing the
# value below 0.3 must result in zero rows passing the floor.
rows_below = [_adjust(0.85, 0.30) for _ in range(5)]
assert all(v < CONFIDENCE_FLOOR for v in rows_below)
def test_abuseipdb_score_30_dropped() -> None:
"""End-to-end worked example: AbuseIPDB score=30 → 0.21 → dropped.
R0054 T1110 base_conf=0.70. Multiplier = 30/100 = 0.30.
0.70 × 0.30 = 0.21 < CONFIDENCE_FLOOR → tag is emitted by the lifter
but insert_tags drops it.
"""
import asyncio
from pathlib import Path
from decnet.ttp.base import TaggerEvent
from decnet.ttp.impl.intel_lifter import IntelLifter
from decnet.ttp.store.base import RuleState
from decnet.ttp.store.impl.filesystem import _parse_and_compile
from tests.ttp._stub_store import StubRuleStore
rules_dir = Path(__file__).resolve().parents[2] / "rules" / "ttp"
rule = _parse_and_compile(rules_dir / "R0054.yaml", RuleState())
lifter = IntelLifter(StubRuleStore(compiled=[rule]))
lifter._index.install(rule)
ev = TaggerEvent(
source_kind="intel",
source_id="src-confidence-test",
attacker_uuid="att1",
identity_uuid=None,
session_id=None,
decky_id=None,
payload={"abuseipdb_score": 30, "abuseipdb_categories": [18, 22]},
)
out = asyncio.run(lifter.tag(ev))
assert out, "intel lifter emitted no tags — multiplier not applied"
for tag in out:
assert tag.confidence == pytest.approx(0.21, rel=1e-4), (
f"expected 0.70×0.30=0.21, got {tag.confidence!r}"
)
assert tag.confidence < CONFIDENCE_FLOOR