feat(ttp): extract intel_lifter provider mappings to YAML data + ATT&CK external_reference enrichment

The four provider→technique tables (AbuseIPDB cat→techniques,
GreyNoise tag→techniques, ThreatFox threat_type→techniques, plus
the Feodo binary-listed signal) used to live as Final[dict] constants
in intel_lifter.py. Two real problems with that:

1. Drift between rules/ttp/R0054.yaml..R0058.yaml (which declare
   the full slate per provider) and the Python dicts (which decide
   which slate-member fires per signal). The v2 audit comment in
   intel_lifter.py documented that they had silently drifted.
2. No ATT&CK provenance on emissions — the loaded STIX bundle has
   rich external_references (canonical attack.mitre.org URLs) that
   never surfaced because the lifter had no path back to them.

Mappings now live as YAML at decnet/ttp/data/intel/{provider}.yaml,
validated at load against the loaded ATT&CK bundle, with each entry
enriched by attack_stix._attack_pattern_by_id to attach the canonical
MITRE URL to every emission.

- decnet/ttp/data/intel_loader.py: pydantic-validated schema +
  ProviderMapping/Signal/TechniqueEmission frozen dataclasses +
  load_provider_mapping(provider) lru-cached.
- Per-technique high_score_threshold inlined into YAML
  (collapses the separate _ABUSEIPDB_HIGH_SCORE_GATED dict).
- external_reference field follows the STIX 2.1 external-reference
  shape (source_name + url + optional external_id) so the future
  STIX/MISP exporter is a direct translation.
- intel_lifter.py: dicts deleted, decision functions read from
  ProviderMapping accessors. Decision-flow constants (T1071/T1595
  bare-classification fallbacks in _greynoise_decisions) stay in
  code — they're not table rows.
- Each emit slot's evidence_extra now carries mitre_url for any
  technique resolved in the bundle (every one in practice).
- tests/ttp/test_intel_mappings.py: snapshot equivalence vs the
  legacy dicts, high-score gate behavior, every-signal-has-an-
  external-reference, every-emission-has-a-mitre-url, negative
  paths (unknown technique_id raises AttackBundleError, mismatched
  provider field rejected, dir listing matches expected providers).

The YAML schema + mitre_url enrichment lays groundwork for the
future STIX exporter; this commit does NOT build that exporter.
This commit is contained in:
2026-05-09 06:18:25 -04:00
parent a3f1cea2d6
commit d25f69ba1b
9 changed files with 853 additions and 95 deletions

View File

@@ -0,0 +1,6 @@
"""Data files used at runtime by the TTP layer.
See ``decnet/ttp/data/intel/`` for provider-signal → ATT&CK technique
mappings consumed by :mod:`decnet.ttp.impl.intel_lifter` via
:mod:`decnet.ttp.data.intel_loader`.
"""

View File

@@ -0,0 +1,8 @@
"""Per-provider intel-signal → ATT&CK technique mapping data.
One YAML file per intel provider (abuseipdb / greynoise / feodo /
threatfox), structured per the schema in
:mod:`decnet.ttp.data.intel_loader`. Each entry carries a STIX-shaped
``external_reference`` so the future STIX/MISP exporter can emit
relationship objects without a second mapping pass.
"""

View File

@@ -0,0 +1,125 @@
# AbuseIPDB category → ATT&CK technique mapping.
#
# Mirrors what _ABUSEIPDB_CATEGORY_TO_TECHNIQUES + _ABUSEIPDB_HIGH_SCORE_GATED
# used to encode in decnet/ttp/impl/intel_lifter.py before the data
# extraction. Source-of-truth column for which categories produce
# which ATT&CK tags, paired with rules/ttp/R0054.yaml which declares
# the full slate the predicate can emit.
#
# Cat 4 (DDoS), 10 (Web Spam), 12 (Blog Spam) are intentionally
# unmapped — design doc TTP_TAGGING.md §A.10: DDoS-without-protocol
# is too muddy for v0; CMS spam has no clean ATT&CK fit at the IP
# layer. Keep the explanatory comments here so the next quarterly
# drift check (development/DEBT.md DEBT-048) can diff cheaply.
provider: abuseipdb
mapping_version: "2"
attack_release: ">=15.1"
signals:
- id: cat_5
label: "FTP Brute-Force"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#5"
techniques:
- technique_id: T1110
- id: cat_7
label: "Phishing"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#7"
techniques:
- technique_id: T1566
- id: cat_9
label: "Open Proxy"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#9"
techniques:
- technique_id: T1090
- id: cat_11
label: "Email Spam"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#11"
techniques:
- technique_id: T1496
- technique_id: T1566
high_score_threshold: 80
- id: cat_13
label: "VPN IP"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#13"
techniques:
- technique_id: T1090
- id: cat_14
label: "Port Scan"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#14"
techniques:
- technique_id: T1046
- technique_id: T1595
- id: cat_15
label: "Hacking"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#15"
techniques:
- technique_id: T1190
- id: cat_16
label: "SQL Injection"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#16"
techniques:
- technique_id: T1190
- id: cat_17
label: "Spoofing"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#17"
techniques:
- technique_id: T1566
- id: cat_18
label: "Brute-Force"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#18"
techniques:
- technique_id: T1110
- id: cat_19
label: "Bad Web Bot"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#19"
techniques:
- technique_id: T1595
- id: cat_20
label: "Exploited Host"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#20"
techniques:
- technique_id: T1078
- id: cat_21
label: "Web App Attack"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#21"
techniques:
- technique_id: T1190
- id: cat_22
label: "SSH"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#22"
techniques:
- technique_id: T1110
- id: cat_23
label: "IoT Targeted"
external_reference:
source_name: abuseipdb
url: "https://www.abuseipdb.com/categories#23"
techniques:
- technique_id: T1190

View File

@@ -0,0 +1,20 @@
# Feodo Tracker → ATT&CK technique mapping.
#
# Feodo Tracker is a binary listed/not-listed feed; there are no
# per-signal subtypes to enumerate. Both T1071 (Application Layer
# Protocol) and T1588 (Obtain Capabilities) fire whenever an attacker
# IP is on the Feodo blocklist. Keeping this as a single ``feodo_listed``
# signal preserves the structured-mapping shape for the future
# STIX/MISP exporter without inventing fake categories.
provider: feodo
mapping_version: "1"
attack_release: ">=15.1"
signals:
- id: feodo_listed
label: "Listed on Feodo Tracker"
external_reference:
source_name: feodo
url: "https://feodotracker.abuse.ch/about/"
techniques:
- technique_id: T1071
- technique_id: T1588

View File

@@ -0,0 +1,74 @@
# GreyNoise tag → ATT&CK technique mapping.
#
# Mirrors what _GREYNOISE_TAG_TO_TECHNIQUES used to encode in
# decnet/ttp/impl/intel_lifter.py. Note: GreyNoise's Community
# endpoint does not return tags; these fire only when operators wire
# a non-Community provider (Visualizer / Enterprise / RIOT). Kept
# canonical here so the upgrade path is a column populate, not a
# code change. Decision-flow constants for bare ``classification ==
# "scanner"`` (T1595) and bare ``classification == "malicious"``
# (T1071 at 0.5×) stay in code — they're not table rows.
provider: greynoise
mapping_version: "1"
attack_release: ">=15.1"
signals:
- id: tor_exit_node
label: "Tor exit node"
external_reference:
source_name: greynoise
url: "https://docs.greynoise.io/docs/understanding-greynoise-tags"
external_id: tor_exit_node
techniques:
- technique_id: T1090
- id: ssh_bruteforcer
label: "SSH brute-forcer"
external_reference:
source_name: greynoise
url: "https://docs.greynoise.io/docs/understanding-greynoise-tags"
external_id: ssh_bruteforcer
techniques:
- technique_id: T1110
- id: web_crawler
label: "Web crawler"
external_reference:
source_name: greynoise
url: "https://docs.greynoise.io/docs/understanding-greynoise-tags"
external_id: web_crawler
techniques:
- technique_id: T1595
- id: cobalt_strike
label: "Cobalt Strike"
external_reference:
source_name: greynoise
url: "https://docs.greynoise.io/docs/understanding-greynoise-tags"
external_id: cobalt_strike
techniques:
- technique_id: T1071
- technique_id: T1588
- id: metasploit
label: "Metasploit"
external_reference:
source_name: greynoise
url: "https://docs.greynoise.io/docs/understanding-greynoise-tags"
external_id: metasploit
techniques:
- technique_id: T1071
- technique_id: T1588
- id: sliver
label: "Sliver"
external_reference:
source_name: greynoise
url: "https://docs.greynoise.io/docs/understanding-greynoise-tags"
external_id: sliver
techniques:
- technique_id: T1071
- technique_id: T1588
- id: havoc
label: "Havoc"
external_reference:
source_name: greynoise
url: "https://docs.greynoise.io/docs/understanding-greynoise-tags"
external_id: havoc
techniques:
- technique_id: T1071
- technique_id: T1588

View File

@@ -0,0 +1,45 @@
# ThreatFox threat_type → ATT&CK technique mapping.
#
# Mirrors _THREATFOX_THREAT_TYPE_TO_TECHNIQUES from
# decnet/ttp/impl/intel_lifter.py. ThreatFox's canonical taxonomy is
# the ``threat_type`` field (NOT ``ioc_type`` — that was the v1
# ship-time bug). ``ioc_type`` is the indicator format (url, domain,
# md5_hash, …) and carries no ATT&CK signal.
provider: threatfox
mapping_version: "1"
attack_release: ">=15.1"
signals:
- id: botnet_cc
label: "Botnet C2"
external_reference:
source_name: threatfox
url: "https://threatfox.abuse.ch/faq/"
external_id: botnet_cc
techniques:
- technique_id: T1071
- technique_id: T1588
- id: payload_delivery
label: "Payload delivery"
external_reference:
source_name: threatfox
url: "https://threatfox.abuse.ch/faq/"
external_id: payload_delivery
techniques:
- technique_id: T1105
- technique_id: T1588
- id: payload
label: "Payload"
external_reference:
source_name: threatfox
url: "https://threatfox.abuse.ch/faq/"
external_id: payload
techniques:
- technique_id: T1588
- id: cc_skimming
label: "Credit-card skimming"
external_reference:
source_name: threatfox
url: "https://threatfox.abuse.ch/faq/"
external_id: cc_skimming
techniques:
- technique_id: T1056

View File

@@ -0,0 +1,229 @@
"""YAML-backed loader for intel-provider → ATT&CK technique mappings.
Replaces the ``_*_TO_TECHNIQUES`` ``Final[dict]`` tables that used to
live in :mod:`decnet.ttp.impl.intel_lifter`. Source-of-truth files
live under :mod:`decnet.ttp.data.intel` (one YAML per provider) and
are validated against the loaded ATT&CK STIX bundle at load time:
* every ``technique_id`` in every signal must resolve in
:func:`decnet.ttp.attack_stix.technique_exists`
* every entry is enriched with the canonical MITRE
``external_reference`` (source_name=``mitre-attack``, url) so the
future STIX/MISP exporter can emit fully-resolved relationship
objects without a second mapping pass
Design constraint: this module is the only place provider-mapping
schema knowledge lives. ``intel_lifter`` reads :class:`ProviderMapping`
accessors and never touches the dicts directly.
"""
from __future__ import annotations
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from typing import Any
import yaml
from pydantic import BaseModel, ConfigDict, Field
from decnet.ttp import attack_stix
_DATA_DIR: Path = Path(__file__).parent / "intel"
# ─── YAML schema (pydantic v2) ─────────────────────────────────────
class ExternalReference(BaseModel):
"""STIX 2.1 ``external-reference`` shape — kept faithful so the
future STIX exporter is a direct translation."""
model_config = ConfigDict(frozen=True)
source_name: str
url: str
external_id: str | None = None
class TechniqueEntry(BaseModel):
model_config = ConfigDict(frozen=True)
technique_id: str
# Per-technique gate: emission only fires when an upstream
# confidence score (e.g. AbuseIPDB ``abuseConfidenceScore``)
# meets or exceeds this floor. None = always fire.
high_score_threshold: int | None = None
class SignalEntry(BaseModel):
model_config = ConfigDict(frozen=True)
id: str
label: str
external_reference: ExternalReference
techniques: tuple[TechniqueEntry, ...]
confidence_multiplier: float = 1.0
class ProviderMappingFile(BaseModel):
model_config = ConfigDict(frozen=True)
provider: str
mapping_version: str
attack_release: str = Field(
description="Minimum ATT&CK release this mapping is known-correct against."
)
signals: tuple[SignalEntry, ...]
# ─── Runtime accessor objects ──────────────────────────────────────
@dataclass(frozen=True)
class TechniqueEmission:
"""A single emit slot for a (signal, technique) pair, enriched with the canonical MITRE URL."""
technique_id: str
high_score_threshold: int | None
mitre_url: str | None
@dataclass(frozen=True)
class Signal:
id: str
label: str
external_reference: ExternalReference
emissions: tuple[TechniqueEmission, ...]
confidence_multiplier: float
def technique_ids(self) -> frozenset[str]:
return frozenset(e.technique_id for e in self.emissions)
@dataclass(frozen=True)
class ProviderMapping:
provider: str
mapping_version: str
signals: tuple[Signal, ...]
_by_id: dict[str, Signal]
def get(self, signal_id: str) -> Signal | None:
return self._by_id.get(signal_id)
def techniques_for_signal(
self, signal_id: str, *, score: float | None = None
) -> frozenset[TechniqueEmission]:
"""Emissions a given signal produces, filtered by ``score``-vs-threshold gate.
``score`` is the upstream confidence (e.g. AbuseIPDB
``abuseConfidenceScore`` 0-100). If a technique has a
``high_score_threshold`` and ``score`` is below it (or
unknown), that technique is filtered out. Mirrors the legacy
``_ABUSEIPDB_HIGH_SCORE_GATED`` semantics.
"""
sig = self._by_id.get(signal_id)
if sig is None:
return frozenset()
out: set[TechniqueEmission] = set()
for emission in sig.emissions:
if emission.high_score_threshold is not None:
if score is None or score < emission.high_score_threshold:
continue
out.add(emission)
return frozenset(out)
def all_technique_ids(self) -> frozenset[str]:
return frozenset(
e.technique_id for sig in self.signals for e in sig.emissions
)
def signal_ids(self) -> frozenset[str]:
return frozenset(self._by_id.keys())
# ─── Loader ────────────────────────────────────────────────────────
def _mitre_url_for(technique_id: str) -> str | None:
obj = attack_stix._attack_pattern_by_id(technique_id)
if obj is None:
return None
for ref in obj.get("external_references", []):
if ref.get("source_name") == "mitre-attack":
return ref.get("url")
return None
def _data_path(provider: str) -> Path:
return _DATA_DIR / f"{provider}.yaml"
@lru_cache(maxsize=8)
def load_provider_mapping(provider: str) -> ProviderMapping:
"""Load + validate + enrich a provider's mapping YAML. Cached process-wide."""
path = _data_path(provider)
if not path.is_file():
raise FileNotFoundError(
f"intel mapping for provider {provider!r} not found at {path}"
)
raw: Any = yaml.safe_load(path.read_text(encoding="utf-8"))
parsed = ProviderMappingFile.model_validate(raw)
if parsed.provider != provider:
raise ValueError(
f"{path}: provider field {parsed.provider!r} does not match "
f"filename {provider!r}"
)
# Validate every technique resolves in the loaded ATT&CK bundle.
all_ids = sorted(
{t.technique_id for s in parsed.signals for t in s.techniques}
)
attack_stix.assert_known_technique_ids(
all_ids, source=f"decnet/ttp/data/intel/{provider}.yaml"
)
signals: list[Signal] = []
for s in parsed.signals:
emissions = tuple(
TechniqueEmission(
technique_id=t.technique_id,
high_score_threshold=t.high_score_threshold,
mitre_url=_mitre_url_for(t.technique_id),
)
for t in s.techniques
)
signals.append(
Signal(
id=s.id,
label=s.label,
external_reference=s.external_reference,
emissions=emissions,
confidence_multiplier=s.confidence_multiplier,
)
)
by_id = {s.id: s for s in signals}
if len(by_id) != len(signals):
dupes = [s.id for s in signals if list(by_id).count(s.id) > 1]
raise ValueError(f"{path}: duplicate signal ids: {dupes}")
return ProviderMapping(
provider=parsed.provider,
mapping_version=parsed.mapping_version,
signals=tuple(signals),
_by_id=by_id,
)
def clear_cache() -> None:
"""Drop cached :class:`ProviderMapping` instances. Test-only knob."""
load_provider_mapping.cache_clear()
__all__ = [
"ExternalReference",
"ProviderMapping",
"Signal",
"TechniqueEmission",
"clear_cache",
"load_provider_mapping",
]