feat(intel): provider ABC + lazy factory
IntelProvider is async-first (every concrete provider does HTTP), bounded by a per-provider asyncio.Semaphore, and contractually never raises — errors land in IntelResult.error so a single provider's outage doesn't poison the worker pass for an entire IP. Factory returns a list (not a singleton like geoip) because intel enrichment fans out across all enabled providers per IP, with row-level partial-success handling. Lazy imports keep the module dependency-free when intel is disabled. Concrete providers (greynoise/abuseipdb/feodo/threatfox) land in follow-up commits — factory references them via lazy import so tests covering the disabled and unknown-name paths pass on their own.
This commit is contained in:
10
decnet/intel/__init__.py
Normal file
10
decnet/intel/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""Threat-intel enrichment subsystem — out-of-band lookups for attacker IPs.
|
||||
|
||||
Sibling to :mod:`decnet.geoip` and :mod:`decnet.asn`, but runs as a
|
||||
separate worker (``decnet enrich``) rather than inline in the profiler:
|
||||
3rd-party HTTP latency and free-tier rate limits should not block the
|
||||
profiler tick.
|
||||
|
||||
Public surface: :func:`decnet.intel.factory.get_intel_providers` and the
|
||||
:class:`decnet.intel.base.IntelProvider` ABC.
|
||||
"""
|
||||
80
decnet/intel/base.py
Normal file
80
decnet/intel/base.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""Threat-intel provider protocol.
|
||||
|
||||
Each concrete provider (:mod:`decnet.intel.greynoise`,
|
||||
:mod:`decnet.intel.abuseipdb`, :mod:`decnet.intel.feodo`,
|
||||
:mod:`decnet.intel.threatfox`) implements this. Callers must obtain
|
||||
providers via :func:`decnet.intel.factory.get_intel_providers` — never
|
||||
instantiate a concrete provider class directly.
|
||||
|
||||
Unlike :mod:`decnet.geoip` (which returns a single ``Provider``), the
|
||||
intel subsystem returns a **list** of providers — enrichment fans out
|
||||
across all of them per IP, and partial successes are stored row-wise.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntelResult:
|
||||
"""Per-provider enrichment outcome.
|
||||
|
||||
The worker maps these into the per-provider columns on
|
||||
``attacker_intel`` (e.g. ``greynoise_classification`` /
|
||||
``greynoise_raw`` / ``greynoise_queried_at``).
|
||||
|
||||
``column_updates`` carries the dialect-portable column→value map the
|
||||
repository ``upsert_attacker_intel`` will apply. ``raw`` is the
|
||||
serialized provider response (already JSON-encoded by the provider so
|
||||
the worker doesn't need to know the wire shape).
|
||||
"""
|
||||
|
||||
provider: str
|
||||
"""Short tag — matches the column prefix in ``attacker_intel``
|
||||
(``greynoise``, ``abuseipdb``, ``feodo``, ``threatfox``)."""
|
||||
|
||||
column_updates: dict[str, Any] = field(default_factory=dict)
|
||||
"""Columns to write on the ``attacker_intel`` row."""
|
||||
|
||||
verdict: Optional[str] = None
|
||||
"""Provider-local verdict label, e.g. ``"malicious"`` / ``"benign"``.
|
||||
Used by the worker to compute ``aggregate_verdict``. ``None`` =
|
||||
"no opinion" (e.g. IP not present in a blocklist)."""
|
||||
|
||||
error: Optional[str] = None
|
||||
"""Populated when the provider call failed. The worker logs it and
|
||||
leaves the row unchanged for this provider so a partial-success
|
||||
enrichment doesn't clobber a previous good answer."""
|
||||
|
||||
|
||||
class IntelProvider(ABC):
|
||||
"""Abstract threat-intel provider."""
|
||||
|
||||
#: Short tag — matches ``IntelResult.provider`` and the column prefix
|
||||
#: on ``attacker_intel``.
|
||||
name: str
|
||||
|
||||
#: Per-provider in-flight cap. Free tiers are surprisingly tight
|
||||
#: (GreyNoise community ~50/min); 4 is a safe default but providers
|
||||
#: can override.
|
||||
concurrency: int = 4
|
||||
|
||||
#: Minimum seconds between dispatches. Token-bucket-lite — see
|
||||
#: :class:`decnet.intel.worker.RateLimitedDispatcher`.
|
||||
min_dispatch_interval_s: float = 0.0
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._semaphore = asyncio.Semaphore(self.concurrency)
|
||||
|
||||
@abstractmethod
|
||||
async def lookup(self, ip: str) -> IntelResult:
|
||||
"""Query the provider for ``ip`` and return the result.
|
||||
|
||||
MUST NOT raise — capture errors in ``IntelResult.error`` so a
|
||||
single provider's outage doesn't break the worker pass for an
|
||||
entire IP. Implementations should also respect
|
||||
``self._semaphore`` to bound in-flight calls.
|
||||
"""
|
||||
73
decnet/intel/factory.py
Normal file
73
decnet/intel/factory.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""Threat-intel provider factory.
|
||||
|
||||
Returns the **list** of configured :class:`IntelProvider` instances —
|
||||
diverges from :mod:`decnet.geoip.factory` (which returns a single
|
||||
provider) because intel enrichment fans out across every enabled
|
||||
provider per IP, with partial-success handling per row.
|
||||
|
||||
Configuration knobs (env-overridable; INI-driven defaults via
|
||||
``decnet/config_ini.py``):
|
||||
|
||||
* ``DECNET_INTEL_ENABLED`` — master kill-switch (default ``true``).
|
||||
* ``DECNET_INTEL_PROVIDERS`` — comma-separated list. Default
|
||||
``"greynoise,abuseipdb,feodo,threatfox"``.
|
||||
|
||||
Per-provider keys (``DECNET_GREYNOISE_API_KEY``,
|
||||
``DECNET_ABUSEIPDB_API_KEY``, ``DECNET_THREATFOX_API_KEY``) are read by
|
||||
each concrete provider; the factory just instantiates and returns.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
from decnet.intel.base import IntelProvider
|
||||
|
||||
_KNOWN_PROVIDERS = ("greynoise", "abuseipdb", "feodo", "threatfox")
|
||||
|
||||
|
||||
def _enabled() -> bool:
|
||||
return os.environ.get("DECNET_INTEL_ENABLED", "true").lower() != "false"
|
||||
|
||||
|
||||
def _provider_list() -> list[str]:
|
||||
raw = os.environ.get(
|
||||
"DECNET_INTEL_PROVIDERS", ",".join(_KNOWN_PROVIDERS),
|
||||
)
|
||||
return [p.strip().lower() for p in raw.split(",") if p.strip()]
|
||||
|
||||
|
||||
def get_intel_providers() -> List[IntelProvider]:
|
||||
"""Return the configured threat-intel providers.
|
||||
|
||||
Returns ``[]`` when intel is disabled or the configured list is
|
||||
empty — the worker treats that as "stay running but never make a
|
||||
call," which is the right behavior for an operator who wants the
|
||||
table maintained but no egress.
|
||||
|
||||
Unknown provider names raise :class:`ValueError` so a typo in
|
||||
``decnet.ini`` surfaces immediately rather than silently dropping a
|
||||
provider.
|
||||
"""
|
||||
if not _enabled():
|
||||
return []
|
||||
|
||||
providers: List[IntelProvider] = []
|
||||
for name in _provider_list():
|
||||
if name == "greynoise":
|
||||
from decnet.intel.greynoise import GreyNoiseProvider
|
||||
providers.append(GreyNoiseProvider())
|
||||
elif name == "abuseipdb":
|
||||
from decnet.intel.abuseipdb import AbuseIPDBProvider
|
||||
providers.append(AbuseIPDBProvider())
|
||||
elif name == "feodo":
|
||||
from decnet.intel.feodo import FeodoProvider
|
||||
providers.append(FeodoProvider())
|
||||
elif name == "threatfox":
|
||||
from decnet.intel.threatfox import ThreatFoxProvider
|
||||
providers.append(ThreatFoxProvider())
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unknown intel provider: {name!r}. Known: {_KNOWN_PROVIDERS}"
|
||||
)
|
||||
return providers
|
||||
57
tests/intel/test_factory.py
Normal file
57
tests/intel/test_factory.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""Tests for the intel provider factory.
|
||||
|
||||
The factory returns a **list** of configured providers (not a singleton
|
||||
like :mod:`decnet.geoip.factory`). Coverage:
|
||||
|
||||
* disabled master switch returns ``[]``
|
||||
* empty provider list returns ``[]``
|
||||
* unknown provider name raises ``ValueError`` (typo guard)
|
||||
* trimming + case-insensitivity of the providers env var
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.intel.factory import get_intel_providers
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolate_env(monkeypatch):
|
||||
# Disable real providers — concrete impls land in later commits, but
|
||||
# the factory tests should pass against whatever subset exists today
|
||||
# via empty/unknown lists.
|
||||
for key in (
|
||||
"DECNET_INTEL_ENABLED",
|
||||
"DECNET_INTEL_PROVIDERS",
|
||||
"DECNET_GREYNOISE_API_KEY",
|
||||
"DECNET_ABUSEIPDB_API_KEY",
|
||||
"DECNET_THREATFOX_API_KEY",
|
||||
):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
|
||||
|
||||
def test_disabled_returns_empty(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_INTEL_ENABLED", "false")
|
||||
monkeypatch.setenv("DECNET_INTEL_PROVIDERS", "greynoise")
|
||||
assert get_intel_providers() == []
|
||||
|
||||
|
||||
def test_empty_provider_list_returns_empty(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_INTEL_PROVIDERS", "")
|
||||
assert get_intel_providers() == []
|
||||
|
||||
|
||||
def test_unknown_provider_name_raises(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_INTEL_PROVIDERS", "definitely-not-real")
|
||||
with pytest.raises(ValueError, match="Unknown intel provider"):
|
||||
get_intel_providers()
|
||||
|
||||
|
||||
def test_whitespace_and_case_normalised(monkeypatch):
|
||||
# The factory imports concrete provider modules lazily; this test only
|
||||
# asserts that case+whitespace normalization doesn't trip the lookup.
|
||||
# We use an unknown name (which would also be unknown if not lowercased)
|
||||
# to exercise the path without requiring provider impls to exist yet.
|
||||
monkeypatch.setenv("DECNET_INTEL_PROVIDERS", " Mystery , ")
|
||||
with pytest.raises(ValueError, match="mystery"):
|
||||
get_intel_providers()
|
||||
Reference in New Issue
Block a user