Files
DECNET/decnet/geoip/__init__.py
anti ffc275f051 feat(geoip): country-code enrichment via RIR delegated-stats
Populates Attacker.country_code + country_source (MVP) using the five
RIR delegated-stats files (ARIN/RIPE/APNIC/LACNIC/AFRINIC). Offline,
license-free, no outbound traffic that could burn honeypot stealth.

- decnet.geoip package with factory/base/lookup + rir/ subpackage
  (fetch/parse/provider) mirroring the db + bus factory convention
- Profiler._build_record calls enrich_ip on every upsert
- Idempotent ALTER TABLE migrations for both SQLite and MySQL
- decnet geoip refresh/lookup CLI (master-only)
- /var/lib/decnet/geoip seeded by decnet init
- DECNET_GEOIP_ENABLED=false kill-switch; set in tests/conftest.py so
  unit tests never trigger the first-access fetch
2026-04-23 21:12:38 -04:00

96 lines
3.2 KiB
Python

"""
GeoIP enrichment — maps attacker IPs to country codes for attacker intelligence.
Public surface:
* :func:`get_lookup` — returns the singleton :class:`~decnet.geoip.lookup.Lookup`.
Builds / loads the index on first call. Refreshes the underlying data files
if they're missing or older than :data:`REFRESH_INTERVAL_S`.
* :func:`enrich_ip` — convenience wrapper used by the profiler: takes an IP
string, returns ``(country_code, provider_name)`` or ``(None, None)``.
Provider selection goes through :func:`~decnet.geoip.factory.get_provider`
(env ``DECNET_GEOIP_PROVIDER``, default ``rir``). Direct imports of concrete
providers are forbidden — mirrors the ``get_bus`` / ``get_repository`` rule.
"""
from __future__ import annotations
import os
import time
from typing import Optional, Tuple
from decnet.geoip.factory import get_provider
from decnet.geoip.lookup import Lookup
from decnet.geoip.paths import GEOIP_ROOT
# 24 h — delegated-stats files are refreshed daily by the RIRs.
REFRESH_INTERVAL_S = 86_400
_lookup: Optional[Lookup] = None
_provider_name: Optional[str] = None
def get_lookup(*, force_refresh: bool = False) -> Lookup:
"""Return the cached :class:`Lookup`, building it on first use.
If the provider's data files are missing or older than
``REFRESH_INTERVAL_S`` seconds, refresh before building. Pass
``force_refresh=True`` to bypass the age check (used by
``decnet geoip refresh``).
"""
global _lookup, _provider_name
provider = get_provider()
_provider_name = provider.name
if force_refresh or _files_stale(provider):
provider.refresh()
_lookup = None # rebuild on next access
if _lookup is None:
_lookup = provider.build_lookup()
return _lookup
def enrich_ip(ip: str) -> Tuple[Optional[str], Optional[str]]:
"""Return ``(country_code, provider_name)`` or ``(None, None)``.
Never raises — any lookup failure collapses to ``(None, None)`` so the
caller (profiler) can upsert the attacker row regardless.
``DECNET_GEOIP_ENABLED=false`` short-circuits the whole path, useful
for tests / agent hosts / ops wanting to disable enrichment without
touching provider config.
"""
if os.environ.get("DECNET_GEOIP_ENABLED", "true").lower() == "false":
return (None, None)
try:
lookup = get_lookup()
cc = lookup.country(ip)
if cc is None:
return (None, None)
return (cc, _provider_name or "unknown")
except Exception:
return (None, None)
def _files_stale(provider) -> bool:
"""True when the provider has no fresh data on disk.
"Fresh" = at least one data file exists whose mtime is within the
refresh window. We don't demand every RIR file be present: a
partial cache still produces correct answers for the ranges it
covers, and demanding all-or-nothing would trigger a network
refresh every time one RIR endpoint was transiently unreachable.
"""
paths = provider.data_paths()
if not paths:
return True
now = time.time()
for p in paths:
if p.exists() and now - p.stat().st_mtime <= REFRESH_INTERVAL_S:
return False
return True
__all__ = ["get_lookup", "enrich_ip", "GEOIP_ROOT", "REFRESH_INTERVAL_S"]