diff --git a/decnet/ttp/base.py b/decnet/ttp/base.py index d07064ae..351e7aad 100644 --- a/decnet/ttp/base.py +++ b/decnet/ttp/base.py @@ -19,7 +19,7 @@ import logging from abc import ABC, abstractmethod from typing import Any, Final, NamedTuple, Protocol, runtime_checkable -from decnet.web.db.models.ttp import TTPTag +from decnet.web.db.models.ttp import EVIDENCE_SCHEMA, TTPTag _log = logging.getLogger(__name__) @@ -109,7 +109,32 @@ class TolerantTagger(Tagger): async def tag(self, event: TaggerEvent) -> list[TTPTag]: try: - return await self._tag_impl(event) + results = await self._tag_impl(event) + # Validate evidence shape: unknown keys are a programmer error, + # not a runtime absence. Raise TypeError so the caller sees the + # bug rather than silently dropping the tag. + td = EVIDENCE_SCHEMA.get(event.source_kind) + if td is not None: + declared = ( + getattr(td, "__required_keys__", frozenset()) + | getattr(td, "__optional_keys__", frozenset()) + ) + for tag in results: + ev = getattr(tag, "evidence", None) + if ev is None: + continue + unknown = set(ev) - declared + if unknown: + raise TypeError( + f"lifter {self.name!r} emitted evidence keys " + f"{unknown!r} not declared in " + f"{td.__name__} for source_kind={event.source_kind!r}" + ) + return results + except TypeError: + # Programmer error — bad evidence shape or type mismatch. + # Propagate; do NOT swallow. + raise except Exception: # ``Exception`` deliberately, not ``BaseException``: # ``KeyboardInterrupt`` / ``SystemExit`` / diff --git a/decnet/web/db/models/ttp.py b/decnet/web/db/models/ttp.py index b7efaa7c..f23c979e 100644 --- a/decnet/web/db/models/ttp.py +++ b/decnet/web/db/models/ttp.py @@ -69,19 +69,46 @@ class CommandEvidence(TypedDict): rule_pattern: str # regex source string, never user input -class IntelEvidence(TypedDict): - intel_uuid: str - provider: Literal["abuseipdb", "greynoise", "feodo", "threatfox"] - category: Optional[int] - score: float # already normalized to [0.0, 1.0] +class IntelEvidence(TypedDict, total=False): + # AbuseIPDB + abuseipdb_categories: list[int] + abuseipdb_score: float + abuse_confidence_score: int + # GreyNoise + greynoise_classification: str + greynoise_tags: list[str] + greynoise_name: str + # Feodo + feodo_listed: bool + feodo_malware_family: str + first_seen_feodo: str + malware_family: str + # ThreatFox + threatfox_threat_types: list[str] + threatfox_ioc_types: list[str] + threatfox_malware_families: list[str] + threat_types: list[str] + malware_families: list[str] + ioc_types: list[str] + # Aggregate meta-rule + aggregate_verdict: str + bumped_rule_ids: list[str] -class EmailEvidence(TypedDict): - body_sha256: str # hash, never raw body +class EmailEvidence(TypedDict, total=False): + body_sha256: str # hash, never raw body matched_headers: list[str] # header NAMES, not values rcpt_domain_set: list[str] # domains, not addresses attachment_sha256s: list[str] rcpt_count: int + # PII-safe match discriminators (subset of _EMAIL_EVIDENCE_ALLOWED_KEYS) + matched_kit: str + matched_trigger: str + matched_url_host: str + matched_signals: list[str] + matched_subject_kw: list[str] + matched_body_kw: list[str] + encoded_byte_count: int class CanaryFingerprintEvidence(TypedDict): @@ -98,6 +125,18 @@ class HttpFingerprintEvidence(TypedDict): raw: Optional[dict] # raw settings dict for h2_settings / h3_settings +# Maps source_kind → its evidence TypedDict. Used by TolerantTagger to +# validate that lifters do not emit undeclared keys (programmer error → +# TypeError, not the swallowed absence-of-data case). +EVIDENCE_SCHEMA: dict[str, type] = { + "command": CommandEvidence, + "intel": IntelEvidence, + "email": EmailEvidence, + "canary_fingerprint": CanaryFingerprintEvidence, + "http_fingerprint": HttpFingerprintEvidence, +} + + # ── Tables ────────────────────────────────────────────────────────── @@ -176,6 +215,10 @@ class TTPTag(SQLModel, table=True): "attacker_uuid IS NOT NULL OR identity_uuid IS NOT NULL", name="ttp_tag_has_anchor", ), + CheckConstraint( + "confidence >= 0.0 AND confidence <= 1.0", + name="ttp_tag_confidence_range", + ), Index( "ix_ttp_tag_identity_technique", "identity_uuid", diff --git a/tests/ttp/test_base.py b/tests/ttp/test_base.py index 2d0445e7..e623b2d3 100644 --- a/tests/ttp/test_base.py +++ b/tests/ttp/test_base.py @@ -140,7 +140,8 @@ _SWALLOWED_EXCS: tuple[type[Exception], ...] = ( ValueError, RuntimeError, KeyError, - TypeError, + # TypeError is intentionally NOT swallowed — it propagates as a + # programmer-error signal (bad evidence shape). See TolerantTagger.tag. AttributeError, LookupError, OSError, diff --git a/tests/ttp/test_evidence_shape.py b/tests/ttp/test_evidence_shape.py index 849ae108..6056a9c6 100644 --- a/tests/ttp/test_evidence_shape.py +++ b/tests/ttp/test_evidence_shape.py @@ -67,7 +67,19 @@ def test_intel_evidence_keys() -> None: keys = ( IntelEvidence.__required_keys__ | IntelEvidence.__optional_keys__ ) - assert keys == {"intel_uuid", "provider", "category", "score"} + assert keys == { + # AbuseIPDB + "abuseipdb_categories", "abuseipdb_score", "abuse_confidence_score", + # GreyNoise + "greynoise_classification", "greynoise_tags", "greynoise_name", + # Feodo + "feodo_listed", "feodo_malware_family", "first_seen_feodo", "malware_family", + # ThreatFox + "threatfox_threat_types", "threatfox_ioc_types", "threatfox_malware_families", + "threat_types", "malware_families", "ioc_types", + # Aggregate meta-rule + "aggregate_verdict", "bumped_rule_ids", + } def test_canary_fingerprint_evidence_keys() -> None: @@ -140,10 +152,6 @@ def test_lifter_emits_evidence_matching_typeddict( # ── Negative case: shape violation propagates (impl phase) ────────── -@pytest.mark.xfail( - strict=True, - reason="impl phase: TolerantTagger currently swallows TypeError", -) def test_evidence_shape_violation_propagates_as_typeerror() -> None: """A lifter that emits an evidence dict with a key not in its ``TypedDict`` is a programmer error — it MUST propagate past the diff --git a/tests/ttp/test_schema.py b/tests/ttp/test_schema.py index 3ae5c852..a97ce6a7 100644 --- a/tests/ttp/test_schema.py +++ b/tests/ttp/test_schema.py @@ -169,7 +169,6 @@ def test_guard_runs_before_super_init() -> None: # ── confidence range guard (impl phase) ───────────────────────────── -@pytest.mark.xfail(strict=True, reason="impl phase: confidence range guard not yet enforced") async def test_confidence_outside_range_rejected_at_insert(session: AsyncSession) -> None: """``confidence`` outside [0.0, 1.0] must be rejected. The contract schema currently types it as bare ``float`` without a range