feat(intel): provider ABC + lazy factory

IntelProvider is async-first (every concrete provider does HTTP), bounded
by a per-provider asyncio.Semaphore, and contractually never raises —
errors land in IntelResult.error so a single provider's outage doesn't
poison the worker pass for an entire IP.

Factory returns a list (not a singleton like geoip) because intel
enrichment fans out across all enabled providers per IP, with row-level
partial-success handling. Lazy imports keep the module dependency-free
when intel is disabled.

Concrete providers (greynoise/abuseipdb/feodo/threatfox) land in
follow-up commits — factory references them via lazy import so tests
covering the disabled and unknown-name paths pass on their own.
This commit is contained in:
2026-04-26 04:58:38 -04:00
parent 0dd3811436
commit 023bc1993d
4 changed files with 220 additions and 0 deletions

10
decnet/intel/__init__.py Normal file
View File

@@ -0,0 +1,10 @@
"""Threat-intel enrichment subsystem — out-of-band lookups for attacker IPs.
Sibling to :mod:`decnet.geoip` and :mod:`decnet.asn`, but runs as a
separate worker (``decnet enrich``) rather than inline in the profiler:
3rd-party HTTP latency and free-tier rate limits should not block the
profiler tick.
Public surface: :func:`decnet.intel.factory.get_intel_providers` and the
:class:`decnet.intel.base.IntelProvider` ABC.
"""

80
decnet/intel/base.py Normal file
View File

@@ -0,0 +1,80 @@
"""Threat-intel provider protocol.
Each concrete provider (:mod:`decnet.intel.greynoise`,
:mod:`decnet.intel.abuseipdb`, :mod:`decnet.intel.feodo`,
:mod:`decnet.intel.threatfox`) implements this. Callers must obtain
providers via :func:`decnet.intel.factory.get_intel_providers` — never
instantiate a concrete provider class directly.
Unlike :mod:`decnet.geoip` (which returns a single ``Provider``), the
intel subsystem returns a **list** of providers — enrichment fans out
across all of them per IP, and partial successes are stored row-wise.
"""
from __future__ import annotations
import asyncio
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any, Optional
@dataclass
class IntelResult:
"""Per-provider enrichment outcome.
The worker maps these into the per-provider columns on
``attacker_intel`` (e.g. ``greynoise_classification`` /
``greynoise_raw`` / ``greynoise_queried_at``).
``column_updates`` carries the dialect-portable column→value map the
repository ``upsert_attacker_intel`` will apply. ``raw`` is the
serialized provider response (already JSON-encoded by the provider so
the worker doesn't need to know the wire shape).
"""
provider: str
"""Short tag — matches the column prefix in ``attacker_intel``
(``greynoise``, ``abuseipdb``, ``feodo``, ``threatfox``)."""
column_updates: dict[str, Any] = field(default_factory=dict)
"""Columns to write on the ``attacker_intel`` row."""
verdict: Optional[str] = None
"""Provider-local verdict label, e.g. ``"malicious"`` / ``"benign"``.
Used by the worker to compute ``aggregate_verdict``. ``None`` =
"no opinion" (e.g. IP not present in a blocklist)."""
error: Optional[str] = None
"""Populated when the provider call failed. The worker logs it and
leaves the row unchanged for this provider so a partial-success
enrichment doesn't clobber a previous good answer."""
class IntelProvider(ABC):
"""Abstract threat-intel provider."""
#: Short tag — matches ``IntelResult.provider`` and the column prefix
#: on ``attacker_intel``.
name: str
#: Per-provider in-flight cap. Free tiers are surprisingly tight
#: (GreyNoise community ~50/min); 4 is a safe default but providers
#: can override.
concurrency: int = 4
#: Minimum seconds between dispatches. Token-bucket-lite — see
#: :class:`decnet.intel.worker.RateLimitedDispatcher`.
min_dispatch_interval_s: float = 0.0
def __init__(self) -> None:
self._semaphore = asyncio.Semaphore(self.concurrency)
@abstractmethod
async def lookup(self, ip: str) -> IntelResult:
"""Query the provider for ``ip`` and return the result.
MUST NOT raise — capture errors in ``IntelResult.error`` so a
single provider's outage doesn't break the worker pass for an
entire IP. Implementations should also respect
``self._semaphore`` to bound in-flight calls.
"""

73
decnet/intel/factory.py Normal file
View File

@@ -0,0 +1,73 @@
"""Threat-intel provider factory.
Returns the **list** of configured :class:`IntelProvider` instances —
diverges from :mod:`decnet.geoip.factory` (which returns a single
provider) because intel enrichment fans out across every enabled
provider per IP, with partial-success handling per row.
Configuration knobs (env-overridable; INI-driven defaults via
``decnet/config_ini.py``):
* ``DECNET_INTEL_ENABLED`` — master kill-switch (default ``true``).
* ``DECNET_INTEL_PROVIDERS`` — comma-separated list. Default
``"greynoise,abuseipdb,feodo,threatfox"``.
Per-provider keys (``DECNET_GREYNOISE_API_KEY``,
``DECNET_ABUSEIPDB_API_KEY``, ``DECNET_THREATFOX_API_KEY``) are read by
each concrete provider; the factory just instantiates and returns.
"""
from __future__ import annotations
import os
from typing import List
from decnet.intel.base import IntelProvider
_KNOWN_PROVIDERS = ("greynoise", "abuseipdb", "feodo", "threatfox")
def _enabled() -> bool:
return os.environ.get("DECNET_INTEL_ENABLED", "true").lower() != "false"
def _provider_list() -> list[str]:
raw = os.environ.get(
"DECNET_INTEL_PROVIDERS", ",".join(_KNOWN_PROVIDERS),
)
return [p.strip().lower() for p in raw.split(",") if p.strip()]
def get_intel_providers() -> List[IntelProvider]:
"""Return the configured threat-intel providers.
Returns ``[]`` when intel is disabled or the configured list is
empty — the worker treats that as "stay running but never make a
call," which is the right behavior for an operator who wants the
table maintained but no egress.
Unknown provider names raise :class:`ValueError` so a typo in
``decnet.ini`` surfaces immediately rather than silently dropping a
provider.
"""
if not _enabled():
return []
providers: List[IntelProvider] = []
for name in _provider_list():
if name == "greynoise":
from decnet.intel.greynoise import GreyNoiseProvider
providers.append(GreyNoiseProvider())
elif name == "abuseipdb":
from decnet.intel.abuseipdb import AbuseIPDBProvider
providers.append(AbuseIPDBProvider())
elif name == "feodo":
from decnet.intel.feodo import FeodoProvider
providers.append(FeodoProvider())
elif name == "threatfox":
from decnet.intel.threatfox import ThreatFoxProvider
providers.append(ThreatFoxProvider())
else:
raise ValueError(
f"Unknown intel provider: {name!r}. Known: {_KNOWN_PROVIDERS}"
)
return providers

View File

@@ -0,0 +1,57 @@
"""Tests for the intel provider factory.
The factory returns a **list** of configured providers (not a singleton
like :mod:`decnet.geoip.factory`). Coverage:
* disabled master switch returns ``[]``
* empty provider list returns ``[]``
* unknown provider name raises ``ValueError`` (typo guard)
* trimming + case-insensitivity of the providers env var
"""
from __future__ import annotations
import pytest
from decnet.intel.factory import get_intel_providers
@pytest.fixture(autouse=True)
def _isolate_env(monkeypatch):
# Disable real providers — concrete impls land in later commits, but
# the factory tests should pass against whatever subset exists today
# via empty/unknown lists.
for key in (
"DECNET_INTEL_ENABLED",
"DECNET_INTEL_PROVIDERS",
"DECNET_GREYNOISE_API_KEY",
"DECNET_ABUSEIPDB_API_KEY",
"DECNET_THREATFOX_API_KEY",
):
monkeypatch.delenv(key, raising=False)
def test_disabled_returns_empty(monkeypatch):
monkeypatch.setenv("DECNET_INTEL_ENABLED", "false")
monkeypatch.setenv("DECNET_INTEL_PROVIDERS", "greynoise")
assert get_intel_providers() == []
def test_empty_provider_list_returns_empty(monkeypatch):
monkeypatch.setenv("DECNET_INTEL_PROVIDERS", "")
assert get_intel_providers() == []
def test_unknown_provider_name_raises(monkeypatch):
monkeypatch.setenv("DECNET_INTEL_PROVIDERS", "definitely-not-real")
with pytest.raises(ValueError, match="Unknown intel provider"):
get_intel_providers()
def test_whitespace_and_case_normalised(monkeypatch):
# The factory imports concrete provider modules lazily; this test only
# asserts that case+whitespace normalization doesn't trip the lookup.
# We use an unknown name (which would also be unknown if not lowercased)
# to exercise the path without requiring provider impls to exist yet.
monkeypatch.setenv("DECNET_INTEL_PROVIDERS", " Mystery , ")
with pytest.raises(ValueError, match="mystery"):
get_intel_providers()