Files
DECNET/decnet/ttp/impl/intel_lifter.py
anti 75ff0ede1f fix(ttp): correct intel_lifter mappings + repoint ThreatFox to threat_type
Three bug classes uncovered by the 2026-05-02 ship-time audit:

* AbuseIPDB code/name mismatch in v1: cat 10 was treated as DDoS (it's
  Web Spam — DDoS is cat 4, intentionally unmapped per A.10) and cat 17
  as VPN IP (it's Spoofing — VPN IP is cat 13). Both typos mirrored in
  code AND the design doc Appendix A.10. Code now matches the AbuseIPDB
  taxonomy exactly; cat 17 retargets to T1566 (email-spoofing as a
  phishing precursor), and cats 7 (Phishing) and 16 (SQL Injection)
  pick up T1566 / T1190 emissions that v1 didn't cover.

* ThreatFox dispatch keyed on `ioc_type` in v1, but `ioc_type` is the
  indicator format (url / domain / hash variants) and carries no ATT&CK
  signal. The canonical taxonomy field per ThreatFox's API is
  `threat_type` (botnet_cc / payload_delivery / payload / cc_skimming).
  Repoint dispatch through the new `threatfox_threat_types` payload
  field; `ioc_type` rides as evidence only. Also adds the missing
  cc_skimming -> T1056 (Input Capture) mapping and registers T1056 in
  attack_catalog.py.

* GreyNoise bare-malicious lane: a `classification == "malicious"` row
  with no recognised tag used to emit nothing. Now lights T1071 at a
  half multiplier, suppressed when a tag already fires T1071 to avoid
  double-stamping at conflicting confidence levels.
2026-05-02 18:08:48 -04:00

385 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Intel lifter — opportunistic third-party verdict translator (E.3.10).
Reads ``AttackerIntel``-derived payload fields and emits ATT&CK
techniques per Appendix A.10 with per-provider confidence scaling.
Decoupling rule (design doc §"Decoupling: bus-driven, never a hard
dependency", enforced statically by E.2.7): this module imports
NOTHING from ``decnet.intel.{abuseipdb,greynoise,feodo,threatfox}`` —
only ``decnet.web.db.models`` symbols are permitted via ``TTPTag``.
Per-provider null tolerance is the steady state: a fresh attacker with
no intel row yet produces zero tags. A populated AbuseIPDB column with
no GreyNoise still fires AbuseIPDB-driven rules; the lifter never
waits for cross-provider corroboration as a precondition (the
:class:`~decnet.ttp.impl._state.is_active` check + per-rule predicate
gate emission, not provider count).
"""
from __future__ import annotations
from collections.abc import Callable
from typing import Any, Final
from decnet.ttp.base import TaggerEvent, TolerantTagger
from decnet.ttp.impl._emit import emit_tags
from decnet.ttp.impl._rule_index import RuleIndex
from decnet.ttp.impl._state import apply_ceiling, is_active
from decnet.ttp.impl.rule_engine import _ATTACK_RELEASE, CompiledRule
from decnet.ttp.store.base import RuleStore
from decnet.web.db.models.ttp import TTPTag, compute_tag_uuid
# AbuseIPDB category → set of technique_ids that fire on it. Derived
# from TTP_TAGGING.md Appendix A.10 (post 2026-05-02 ship-time audit).
# Category code names are AbuseIPDB's canonical taxonomy at
# https://www.abuseipdb.com/categories — kept verbatim in the comment so
# the next quarterly drift check (per DEBT.md) can diff cheaply. Cat 4
# (DDoS Attack) and 10 (Web Spam) and 12 (Blog Spam) are intentionally
# unmapped — design doc §A.10 marks DDoS-without-protocol as too muddy
# for v0, and CMS spam has no clean ATT&CK fit at the IP layer.
_ABUSEIPDB_CATEGORY_TO_TECHNIQUES: Final[dict[int, frozenset[str]]] = {
5: frozenset({"T1110"}), # FTP Brute-Force
7: frozenset({"T1566"}), # Phishing
9: frozenset({"T1090"}), # Open Proxy
11: frozenset({"T1496", "T1566"}), # Email Spam (T1566 high-score only)
13: frozenset({"T1090"}), # VPN IP
14: frozenset({"T1046", "T1595"}), # Port Scan
15: frozenset({"T1190"}), # Hacking
16: frozenset({"T1190"}), # SQL Injection
17: frozenset({"T1566"}), # Spoofing (email-sender)
18: frozenset({"T1110"}), # Brute-Force
19: frozenset({"T1595"}), # Bad Web Bot
20: frozenset({"T1078"}), # Exploited Host
21: frozenset({"T1190"}), # Web App Attack
22: frozenset({"T1110"}), # SSH
23: frozenset({"T1190"}), # IoT Targeted
}
# Categories where a technique only fires above a confidence-score
# threshold (per A.10: "11 — Email Spam (high score, ≥80) → T1566").
_ABUSEIPDB_HIGH_SCORE_GATED: Final[dict[int, dict[str, int]]] = {
11: {"T1566": 80},
}
# GreyNoise tag → set of technique_ids the tag warrants. Note: the
# Community endpoint does not return tags today — these fire only when
# operators wire a non-Community provider that does. Kept canonical so
# the upgrade path is just a column populate, not a code change.
_GREYNOISE_TAG_TO_TECHNIQUES: Final[dict[str, frozenset[str]]] = {
"tor_exit_node": frozenset({"T1090"}),
"ssh_bruteforcer": frozenset({"T1110"}),
"web_crawler": frozenset({"T1595"}),
"cobalt_strike": frozenset({"T1071", "T1588"}),
"metasploit": frozenset({"T1071", "T1588"}),
"sliver": frozenset({"T1071", "T1588"}),
"havoc": frozenset({"T1071", "T1588"}),
}
# Confidence multiplier when GreyNoise reports ``classification ==
# "malicious"`` without a specific tag we recognise. The bare
# classification is real signal but weaker than a tag — half-confidence
# keeps the floor honest.
_GREYNOISE_MALICIOUS_BARE_MULT: Final[float] = 0.5
# ThreatFox THREAT TYPE (NOT ioc_type — that was the v1 ship-time bug)
# → set of technique_ids. Per ThreatFox's API the canonical taxonomy
# field is ``threat_type`` ∈ {botnet_cc, payload_delivery, payload,
# cc_skimming}; ``ioc_type`` is the indicator format (url, domain,
# md5_hash, …) and carries no ATT&CK signal.
_THREATFOX_THREAT_TYPE_TO_TECHNIQUES: Final[dict[str, frozenset[str]]] = {
"botnet_cc": frozenset({"T1071", "T1588"}),
"payload_delivery": frozenset({"T1105", "T1588"}),
"payload": frozenset({"T1588"}),
"cc_skimming": frozenset({"T1056"}),
}
# Predicate signature: returns either a list of (technique_id_filter,
# confidence_multiplier, evidence_extra) tuples — one per emit slot the
# rule should fire — or empty list when the rule does not fire.
EmitDecision = list[tuple[str, float, dict[str, Any]]]
Predicate = Callable[[dict[str, Any], dict[str, Any]], EmitDecision]
def _abuseipdb_decisions(
_spec: dict[str, Any], payload: dict[str, Any],
) -> EmitDecision:
score = payload.get("abuseipdb_score")
categories_raw = payload.get("abuseipdb_categories") or payload.get("categories")
if not isinstance(score, (int, float)):
return []
if not isinstance(categories_raw, list) or not categories_raw:
return []
categories: list[int] = [c for c in categories_raw if isinstance(c, int)]
if not categories:
return []
# Resolve technique set across all categories present.
triggered: dict[str, list[int]] = {}
for cat in categories:
for tech in _ABUSEIPDB_CATEGORY_TO_TECHNIQUES.get(cat, frozenset()):
gate = _ABUSEIPDB_HIGH_SCORE_GATED.get(cat, {}).get(tech)
if gate is not None and score < gate:
continue
triggered.setdefault(tech, []).append(cat)
if not triggered:
return []
multiplier = float(score) / 100.0
return [
(tech, multiplier, {
"abuseipdb_categories": cats,
"abuse_confidence_score": int(score),
})
for tech, cats in triggered.items()
]
def _greynoise_decisions(
_spec: dict[str, Any], payload: dict[str, Any],
) -> EmitDecision:
"""Decide GreyNoise emissions.
Three signal lanes:
* ``classification == "scanner"`` — full-strength T1595 (kept for
compatibility with non-Community provider plans that surface
this verdict; the Community endpoint reports {malicious, benign,
suspicious, unknown} only).
* Specific recognised tag → its mapped technique(s) at 1.0×.
* Bare ``classification == "malicious"`` with no recognised tag →
T1071 at half multiplier (post-audit decision: the verdict is
real but unspecific). The bare-malicious lane is suppressed when
a tag already fired on T1071 to avoid double-stamping.
"""
classification = payload.get("greynoise_classification")
tags_raw = payload.get("greynoise_tags") or []
# Per-technique evidence accumulator — maps technique_id to the
# signals that triggered it AND the multiplier to apply (max wins
# if multiple lanes hit the same technique).
triggered: dict[str, tuple[float, list[str]]] = {}
def _bump(tech: str, mult: float, signal: str) -> None:
existing = triggered.get(tech)
if existing is None:
triggered[tech] = (mult, [signal])
return
old_mult, signals = existing
signals.append(signal)
if mult > old_mult:
triggered[tech] = (mult, signals)
if classification == "scanner":
_bump("T1595", 1.0, "scanner")
if isinstance(tags_raw, list):
for tag in tags_raw:
if not isinstance(tag, str):
continue
for tech in _GREYNOISE_TAG_TO_TECHNIQUES.get(tag, frozenset()):
_bump(tech, 1.0, tag)
if classification == "malicious" and "T1071" not in triggered:
_bump("T1071", _GREYNOISE_MALICIOUS_BARE_MULT, "malicious")
if not triggered:
return []
return [
(tech, mult, {
"greynoise_classification": classification,
"greynoise_tags": signals,
})
for tech, (mult, signals) in triggered.items()
]
def _feodo_decisions(
_spec: dict[str, Any], payload: dict[str, Any],
) -> EmitDecision:
if payload.get("feodo_listed") is not True:
return []
family = (
payload.get("feodo_malware_family")
or payload.get("malware_family")
)
extra: dict[str, Any] = {"feodo_listed": True}
if isinstance(family, str) and family:
extra["malware_family"] = family
# Both T1071 and T1588 emits fire from a Feodo hit.
return [
("T1071", 1.0, extra),
("T1588", 1.0, extra),
]
def _threatfox_decisions(
_spec: dict[str, Any], payload: dict[str, Any],
) -> EmitDecision:
"""ThreatFox dispatch keys on ``threat_type`` (canonical taxonomy)
not ``ioc_type`` — the v1 ship-time mapping had it backwards.
Accepts either ``threatfox_threat_types`` (list, preferred — comes
from the bus payload built by the intel worker) or a singular
``threat_type``/``ioc_type`` field for legacy callers and tests.
The lifter is tolerant by contract; missing inputs produce zero
emissions, never an error.
"""
threat_types_raw = (
payload.get("threatfox_threat_types")
or payload.get("threat_type")
)
threat_types: list[str] = []
if isinstance(threat_types_raw, list):
threat_types = [t for t in threat_types_raw if isinstance(t, str)]
elif isinstance(threat_types_raw, str) and threat_types_raw:
threat_types = [threat_types_raw]
triggered: dict[str, list[str]] = {}
for tt in threat_types:
for tech in _THREATFOX_THREAT_TYPE_TO_TECHNIQUES.get(tt, frozenset()):
triggered.setdefault(tech, []).append(tt)
if not triggered:
return []
families_raw = (
payload.get("threatfox_malware_families")
or payload.get("malware_family")
)
families: list[str] = []
if isinstance(families_raw, list):
families = [f for f in families_raw if isinstance(f, str)]
elif isinstance(families_raw, str) and families_raw:
families = [families_raw]
ioc_types_raw = payload.get("threatfox_ioc_types")
ioc_types: list[str] = (
[i for i in ioc_types_raw if isinstance(i, str)]
if isinstance(ioc_types_raw, list) else []
)
return [
(tech, 1.0, {
"threat_types": signals,
**({"malware_families": families} if families else {}),
**({"ioc_types": ioc_types} if ioc_types else {}),
})
for tech, signals in triggered.items()
]
def _aggregate_bump_decisions(
_spec: dict[str, Any], _payload: dict[str, Any],
) -> EmitDecision:
# R0058 is a bump-only meta-rule (TTP_TAGGING.md §"Initial rule pack"
# R0058 + commit b819dfe note: confidence < 0.3 drops at the repo
# layer). The bump-existing semantics need cross-tag access the
# current TaggerEvent contract doesn't provide; deferred to E.3.14
# worker bootstrap. Return empty so R0058 is a no-op in v0.
return []
_PREDICATES: Final[dict[str, Predicate]] = {
"lifter:intel_abuseipdb": _abuseipdb_decisions,
"lifter:intel_greynoise": _greynoise_decisions,
"lifter:intel_feodo": _feodo_decisions,
"lifter:intel_threatfox": _threatfox_decisions,
"lifter:intel_aggregate_bump": _aggregate_bump_decisions,
}
class IntelLifter(TolerantTagger):
name = "intel"
HANDLES = frozenset({"intel"})
OWNED_PREFIX: Final[str] = "lifter:intel_"
def __init__(self, store: RuleStore) -> None:
self._store = store
self._index = RuleIndex()
@classmethod
def _owns(cls, rule: CompiledRule) -> bool:
kind = rule.match_spec.get("kind", "")
return isinstance(kind, str) and kind.startswith(cls.OWNED_PREFIX)
async def watch_store(self) -> None:
await self._index.watch(self._store, predicate=self._owns)
async def _tag_impl(self, event: TaggerEvent) -> list[TTPTag]:
out: list[TTPTag] = []
for rule in self._index.values():
if event.source_kind not in rule.applies_to:
continue
if not is_active(rule.state):
continue
kind = rule.match_spec.get("kind", "")
handler = _PREDICATES.get(kind)
if handler is None:
continue
decisions = handler(rule.match_spec, event.payload)
if not decisions:
continue
out.extend(_emit_filtered(rule, event, decisions))
return out
def _emit_filtered(
rule: CompiledRule,
event: TaggerEvent,
decisions: EmitDecision,
) -> list[TTPTag]:
"""Fan out only the ``rule.emits`` entries whose technique_id is in
the predicate's decision set, scaled by the per-decision multiplier
and stamped with the predicate's evidence extras.
A rule's YAML may declare ``emits=[T1110, T1190, T1566]`` (the
universe of possible emissions); the predicate decides which subset
actually fires for a given payload. This is the lifter analogue of
"one event maps to many techniques" — except the dispatch is signal-
driven, not regex-driven.
"""
decision_by_tech: dict[str, tuple[float, dict[str, Any]]] = {
tech: (mult, extra) for tech, mult, extra in decisions
}
out: list[TTPTag] = []
base_evidence: dict[str, Any] = {
field: event.payload.get(field)
for field in rule.evidence_fields
if field in event.payload
}
for technique_id, sub_technique_id, tactic, base_conf in rule.emits:
if technique_id not in decision_by_tech:
continue
multiplier, extra = decision_by_tech[technique_id]
evidence = dict(base_evidence)
evidence.update(extra)
confidence = apply_ceiling(base_conf * multiplier, rule.state)
tag_uuid = compute_tag_uuid(
source_kind=event.source_kind,
source_id=event.source_id,
rule_id=rule.rule_id,
rule_version=rule.rule_version,
technique_id=technique_id,
sub_technique_id=sub_technique_id,
)
out.append(TTPTag(
uuid=tag_uuid,
source_kind=event.source_kind,
source_id=event.source_id,
attacker_uuid=event.attacker_uuid,
identity_uuid=event.identity_uuid,
session_id=event.session_id,
decky_id=event.decky_id,
tactic=tactic,
technique_id=technique_id,
sub_technique_id=sub_technique_id,
confidence=confidence,
rule_id=rule.rule_id,
rule_version=rule.rule_version,
evidence=evidence,
attack_release=_ATTACK_RELEASE,
))
return out
__all__ = ["IntelLifter"]
# Suppress unused-import lint; emit_tags is exposed for parity with the
# other lifters even though IntelLifter uses _emit_filtered. Leave the
# import present so future refactors that consolidate emission paths
# don't have to re-add it.
_ = emit_tags