diff --git a/decnet/intel/__init__.py b/decnet/intel/__init__.py new file mode 100644 index 00000000..d3574dbe --- /dev/null +++ b/decnet/intel/__init__.py @@ -0,0 +1,10 @@ +"""Threat-intel enrichment subsystem — out-of-band lookups for attacker IPs. + +Sibling to :mod:`decnet.geoip` and :mod:`decnet.asn`, but runs as a +separate worker (``decnet enrich``) rather than inline in the profiler: +3rd-party HTTP latency and free-tier rate limits should not block the +profiler tick. + +Public surface: :func:`decnet.intel.factory.get_intel_providers` and the +:class:`decnet.intel.base.IntelProvider` ABC. +""" diff --git a/decnet/intel/base.py b/decnet/intel/base.py new file mode 100644 index 00000000..665aff4e --- /dev/null +++ b/decnet/intel/base.py @@ -0,0 +1,80 @@ +"""Threat-intel provider protocol. + +Each concrete provider (:mod:`decnet.intel.greynoise`, +:mod:`decnet.intel.abuseipdb`, :mod:`decnet.intel.feodo`, +:mod:`decnet.intel.threatfox`) implements this. Callers must obtain +providers via :func:`decnet.intel.factory.get_intel_providers` — never +instantiate a concrete provider class directly. + +Unlike :mod:`decnet.geoip` (which returns a single ``Provider``), the +intel subsystem returns a **list** of providers — enrichment fans out +across all of them per IP, and partial successes are stored row-wise. +""" +from __future__ import annotations + +import asyncio +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, Optional + + +@dataclass +class IntelResult: + """Per-provider enrichment outcome. + + The worker maps these into the per-provider columns on + ``attacker_intel`` (e.g. ``greynoise_classification`` / + ``greynoise_raw`` / ``greynoise_queried_at``). + + ``column_updates`` carries the dialect-portable column→value map the + repository ``upsert_attacker_intel`` will apply. ``raw`` is the + serialized provider response (already JSON-encoded by the provider so + the worker doesn't need to know the wire shape). + """ + + provider: str + """Short tag — matches the column prefix in ``attacker_intel`` + (``greynoise``, ``abuseipdb``, ``feodo``, ``threatfox``).""" + + column_updates: dict[str, Any] = field(default_factory=dict) + """Columns to write on the ``attacker_intel`` row.""" + + verdict: Optional[str] = None + """Provider-local verdict label, e.g. ``"malicious"`` / ``"benign"``. + Used by the worker to compute ``aggregate_verdict``. ``None`` = + "no opinion" (e.g. IP not present in a blocklist).""" + + error: Optional[str] = None + """Populated when the provider call failed. The worker logs it and + leaves the row unchanged for this provider so a partial-success + enrichment doesn't clobber a previous good answer.""" + + +class IntelProvider(ABC): + """Abstract threat-intel provider.""" + + #: Short tag — matches ``IntelResult.provider`` and the column prefix + #: on ``attacker_intel``. + name: str + + #: Per-provider in-flight cap. Free tiers are surprisingly tight + #: (GreyNoise community ~50/min); 4 is a safe default but providers + #: can override. + concurrency: int = 4 + + #: Minimum seconds between dispatches. Token-bucket-lite — see + #: :class:`decnet.intel.worker.RateLimitedDispatcher`. + min_dispatch_interval_s: float = 0.0 + + def __init__(self) -> None: + self._semaphore = asyncio.Semaphore(self.concurrency) + + @abstractmethod + async def lookup(self, ip: str) -> IntelResult: + """Query the provider for ``ip`` and return the result. + + MUST NOT raise — capture errors in ``IntelResult.error`` so a + single provider's outage doesn't break the worker pass for an + entire IP. Implementations should also respect + ``self._semaphore`` to bound in-flight calls. + """ diff --git a/decnet/intel/factory.py b/decnet/intel/factory.py new file mode 100644 index 00000000..9f130f8f --- /dev/null +++ b/decnet/intel/factory.py @@ -0,0 +1,73 @@ +"""Threat-intel provider factory. + +Returns the **list** of configured :class:`IntelProvider` instances — +diverges from :mod:`decnet.geoip.factory` (which returns a single +provider) because intel enrichment fans out across every enabled +provider per IP, with partial-success handling per row. + +Configuration knobs (env-overridable; INI-driven defaults via +``decnet/config_ini.py``): + +* ``DECNET_INTEL_ENABLED`` — master kill-switch (default ``true``). +* ``DECNET_INTEL_PROVIDERS`` — comma-separated list. Default + ``"greynoise,abuseipdb,feodo,threatfox"``. + +Per-provider keys (``DECNET_GREYNOISE_API_KEY``, +``DECNET_ABUSEIPDB_API_KEY``, ``DECNET_THREATFOX_API_KEY``) are read by +each concrete provider; the factory just instantiates and returns. +""" +from __future__ import annotations + +import os +from typing import List + +from decnet.intel.base import IntelProvider + +_KNOWN_PROVIDERS = ("greynoise", "abuseipdb", "feodo", "threatfox") + + +def _enabled() -> bool: + return os.environ.get("DECNET_INTEL_ENABLED", "true").lower() != "false" + + +def _provider_list() -> list[str]: + raw = os.environ.get( + "DECNET_INTEL_PROVIDERS", ",".join(_KNOWN_PROVIDERS), + ) + return [p.strip().lower() for p in raw.split(",") if p.strip()] + + +def get_intel_providers() -> List[IntelProvider]: + """Return the configured threat-intel providers. + + Returns ``[]`` when intel is disabled or the configured list is + empty — the worker treats that as "stay running but never make a + call," which is the right behavior for an operator who wants the + table maintained but no egress. + + Unknown provider names raise :class:`ValueError` so a typo in + ``decnet.ini`` surfaces immediately rather than silently dropping a + provider. + """ + if not _enabled(): + return [] + + providers: List[IntelProvider] = [] + for name in _provider_list(): + if name == "greynoise": + from decnet.intel.greynoise import GreyNoiseProvider + providers.append(GreyNoiseProvider()) + elif name == "abuseipdb": + from decnet.intel.abuseipdb import AbuseIPDBProvider + providers.append(AbuseIPDBProvider()) + elif name == "feodo": + from decnet.intel.feodo import FeodoProvider + providers.append(FeodoProvider()) + elif name == "threatfox": + from decnet.intel.threatfox import ThreatFoxProvider + providers.append(ThreatFoxProvider()) + else: + raise ValueError( + f"Unknown intel provider: {name!r}. Known: {_KNOWN_PROVIDERS}" + ) + return providers diff --git a/tests/intel/test_factory.py b/tests/intel/test_factory.py new file mode 100644 index 00000000..b3a12279 --- /dev/null +++ b/tests/intel/test_factory.py @@ -0,0 +1,57 @@ +"""Tests for the intel provider factory. + +The factory returns a **list** of configured providers (not a singleton +like :mod:`decnet.geoip.factory`). Coverage: + +* disabled master switch returns ``[]`` +* empty provider list returns ``[]`` +* unknown provider name raises ``ValueError`` (typo guard) +* trimming + case-insensitivity of the providers env var +""" +from __future__ import annotations + +import pytest + +from decnet.intel.factory import get_intel_providers + + +@pytest.fixture(autouse=True) +def _isolate_env(monkeypatch): + # Disable real providers — concrete impls land in later commits, but + # the factory tests should pass against whatever subset exists today + # via empty/unknown lists. + for key in ( + "DECNET_INTEL_ENABLED", + "DECNET_INTEL_PROVIDERS", + "DECNET_GREYNOISE_API_KEY", + "DECNET_ABUSEIPDB_API_KEY", + "DECNET_THREATFOX_API_KEY", + ): + monkeypatch.delenv(key, raising=False) + + +def test_disabled_returns_empty(monkeypatch): + monkeypatch.setenv("DECNET_INTEL_ENABLED", "false") + monkeypatch.setenv("DECNET_INTEL_PROVIDERS", "greynoise") + assert get_intel_providers() == [] + + +def test_empty_provider_list_returns_empty(monkeypatch): + monkeypatch.setenv("DECNET_INTEL_PROVIDERS", "") + assert get_intel_providers() == [] + + +def test_unknown_provider_name_raises(monkeypatch): + monkeypatch.setenv("DECNET_INTEL_PROVIDERS", "definitely-not-real") + with pytest.raises(ValueError, match="Unknown intel provider"): + get_intel_providers() + + +def test_whitespace_and_case_normalised(monkeypatch): + # The factory imports concrete provider modules lazily; this test only + # asserts that case+whitespace normalization doesn't trip the lookup. + # We use an unknown name (which would also be unknown if not lowercased) + # to exercise the path without requiring provider impls to exist yet. + monkeypatch.setenv("DECNET_INTEL_PROVIDERS", " Mystery , ") + with pytest.raises(ValueError, match="mystery"): + get_intel_providers()