feat(ttp): implement evidence-shape validation and confidence range constraint
- TolerantTagger.tag validates evidence keys against EVIDENCE_SCHEMA TypedDicts; TypeError (programmer error) propagates instead of being swallowed - IntelEvidence and EmailEvidence expanded from stubs to full per-provider key sets (total=False); IntelEvidence old stub fields replaced wholesale - EVIDENCE_SCHEMA map added to models/ttp.py and imported by base.py - TTPTag __table_args__ gains confidence [0,1] CheckConstraint (DB-enforced) - xfail removed from test_confidence_outside_range_rejected_at_insert and test_evidence_shape_violation_propagates_as_typeerror — both now pass - TypeError removed from _SWALLOWED_EXCS fuzz list; test_intel_evidence_keys updated to assert the real provider key set
This commit is contained in:
@@ -69,19 +69,46 @@ class CommandEvidence(TypedDict):
|
||||
rule_pattern: str # regex source string, never user input
|
||||
|
||||
|
||||
class IntelEvidence(TypedDict):
|
||||
intel_uuid: str
|
||||
provider: Literal["abuseipdb", "greynoise", "feodo", "threatfox"]
|
||||
category: Optional[int]
|
||||
score: float # already normalized to [0.0, 1.0]
|
||||
class IntelEvidence(TypedDict, total=False):
|
||||
# AbuseIPDB
|
||||
abuseipdb_categories: list[int]
|
||||
abuseipdb_score: float
|
||||
abuse_confidence_score: int
|
||||
# GreyNoise
|
||||
greynoise_classification: str
|
||||
greynoise_tags: list[str]
|
||||
greynoise_name: str
|
||||
# Feodo
|
||||
feodo_listed: bool
|
||||
feodo_malware_family: str
|
||||
first_seen_feodo: str
|
||||
malware_family: str
|
||||
# ThreatFox
|
||||
threatfox_threat_types: list[str]
|
||||
threatfox_ioc_types: list[str]
|
||||
threatfox_malware_families: list[str]
|
||||
threat_types: list[str]
|
||||
malware_families: list[str]
|
||||
ioc_types: list[str]
|
||||
# Aggregate meta-rule
|
||||
aggregate_verdict: str
|
||||
bumped_rule_ids: list[str]
|
||||
|
||||
|
||||
class EmailEvidence(TypedDict):
|
||||
body_sha256: str # hash, never raw body
|
||||
class EmailEvidence(TypedDict, total=False):
|
||||
body_sha256: str # hash, never raw body
|
||||
matched_headers: list[str] # header NAMES, not values
|
||||
rcpt_domain_set: list[str] # domains, not addresses
|
||||
attachment_sha256s: list[str]
|
||||
rcpt_count: int
|
||||
# PII-safe match discriminators (subset of _EMAIL_EVIDENCE_ALLOWED_KEYS)
|
||||
matched_kit: str
|
||||
matched_trigger: str
|
||||
matched_url_host: str
|
||||
matched_signals: list[str]
|
||||
matched_subject_kw: list[str]
|
||||
matched_body_kw: list[str]
|
||||
encoded_byte_count: int
|
||||
|
||||
|
||||
class CanaryFingerprintEvidence(TypedDict):
|
||||
@@ -98,6 +125,18 @@ class HttpFingerprintEvidence(TypedDict):
|
||||
raw: Optional[dict] # raw settings dict for h2_settings / h3_settings
|
||||
|
||||
|
||||
# Maps source_kind → its evidence TypedDict. Used by TolerantTagger to
|
||||
# validate that lifters do not emit undeclared keys (programmer error →
|
||||
# TypeError, not the swallowed absence-of-data case).
|
||||
EVIDENCE_SCHEMA: dict[str, type] = {
|
||||
"command": CommandEvidence,
|
||||
"intel": IntelEvidence,
|
||||
"email": EmailEvidence,
|
||||
"canary_fingerprint": CanaryFingerprintEvidence,
|
||||
"http_fingerprint": HttpFingerprintEvidence,
|
||||
}
|
||||
|
||||
|
||||
# ── Tables ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -176,6 +215,10 @@ class TTPTag(SQLModel, table=True):
|
||||
"attacker_uuid IS NOT NULL OR identity_uuid IS NOT NULL",
|
||||
name="ttp_tag_has_anchor",
|
||||
),
|
||||
CheckConstraint(
|
||||
"confidence >= 0.0 AND confidence <= 1.0",
|
||||
name="ttp_tag_confidence_range",
|
||||
),
|
||||
Index(
|
||||
"ix_ttp_tag_identity_technique",
|
||||
"identity_uuid",
|
||||
|
||||
Reference in New Issue
Block a user