merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
95
decnet/geoip/__init__.py
Normal file
95
decnet/geoip/__init__.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""
|
||||
GeoIP enrichment — maps attacker IPs to country codes for attacker intelligence.
|
||||
|
||||
Public surface:
|
||||
|
||||
* :func:`get_lookup` — returns the singleton :class:`~decnet.geoip.lookup.Lookup`.
|
||||
Builds / loads the index on first call. Refreshes the underlying data files
|
||||
if they're missing or older than :data:`REFRESH_INTERVAL_S`.
|
||||
* :func:`enrich_ip` — convenience wrapper used by the profiler: takes an IP
|
||||
string, returns ``(country_code, provider_name)`` or ``(None, None)``.
|
||||
|
||||
Provider selection goes through :func:`~decnet.geoip.factory.get_provider`
|
||||
(env ``DECNET_GEOIP_PROVIDER``, default ``rir``). Direct imports of concrete
|
||||
providers are forbidden — mirrors the ``get_bus`` / ``get_repository`` rule.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from decnet.geoip.factory import get_provider
|
||||
from decnet.geoip.lookup import Lookup
|
||||
from decnet.geoip.paths import GEOIP_ROOT
|
||||
|
||||
# 24 h — delegated-stats files are refreshed daily by the RIRs.
|
||||
REFRESH_INTERVAL_S = 86_400
|
||||
|
||||
_lookup: Optional[Lookup] = None
|
||||
_provider_name: Optional[str] = None
|
||||
|
||||
|
||||
def get_lookup(*, force_refresh: bool = False) -> Lookup:
|
||||
"""Return the cached :class:`Lookup`, building it on first use.
|
||||
|
||||
If the provider's data files are missing or older than
|
||||
``REFRESH_INTERVAL_S`` seconds, refresh before building. Pass
|
||||
``force_refresh=True`` to bypass the age check (used by
|
||||
``decnet geoip refresh``).
|
||||
"""
|
||||
global _lookup, _provider_name
|
||||
provider = get_provider()
|
||||
_provider_name = provider.name
|
||||
|
||||
if force_refresh or _files_stale(provider):
|
||||
provider.refresh()
|
||||
_lookup = None # rebuild on next access
|
||||
|
||||
if _lookup is None:
|
||||
_lookup = provider.build_lookup()
|
||||
return _lookup
|
||||
|
||||
|
||||
def enrich_ip(ip: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""Return ``(country_code, provider_name)`` or ``(None, None)``.
|
||||
|
||||
Never raises — any lookup failure collapses to ``(None, None)`` so the
|
||||
caller (profiler) can upsert the attacker row regardless.
|
||||
|
||||
``DECNET_GEOIP_ENABLED=false`` short-circuits the whole path, useful
|
||||
for tests / agent hosts / ops wanting to disable enrichment without
|
||||
touching provider config.
|
||||
"""
|
||||
if os.environ.get("DECNET_GEOIP_ENABLED", "true").lower() == "false":
|
||||
return (None, None)
|
||||
try:
|
||||
lookup = get_lookup()
|
||||
cc = lookup.country(ip)
|
||||
if cc is None:
|
||||
return (None, None)
|
||||
return (cc, _provider_name or "unknown")
|
||||
except Exception:
|
||||
return (None, None)
|
||||
|
||||
|
||||
def _files_stale(provider) -> bool:
|
||||
"""True when the provider has no fresh data on disk.
|
||||
|
||||
"Fresh" = at least one data file exists whose mtime is within the
|
||||
refresh window. We don't demand every RIR file be present: a
|
||||
partial cache still produces correct answers for the ranges it
|
||||
covers, and demanding all-or-nothing would trigger a network
|
||||
refresh every time one RIR endpoint was transiently unreachable.
|
||||
"""
|
||||
paths = provider.data_paths()
|
||||
if not paths:
|
||||
return True
|
||||
now = time.time()
|
||||
for p in paths:
|
||||
if p.exists() and now - p.stat().st_mtime <= REFRESH_INTERVAL_S:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
__all__ = ["get_lookup", "enrich_ip", "GEOIP_ROOT", "REFRESH_INTERVAL_S"]
|
||||
34
decnet/geoip/base.py
Normal file
34
decnet/geoip/base.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""GeoIP provider protocol.
|
||||
|
||||
Concrete providers (:mod:`decnet.geoip.rir`, future ``dbip``, ``maxmind``)
|
||||
implement this. Callers must go through
|
||||
:func:`~decnet.geoip.factory.get_provider`; never import a concrete
|
||||
provider class directly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from decnet.geoip.lookup import Lookup
|
||||
|
||||
|
||||
class Provider(ABC):
|
||||
"""Abstract GeoIP data provider."""
|
||||
|
||||
#: Short tag written to ``Attacker.country_source`` (e.g. ``'rir'``).
|
||||
name: str
|
||||
|
||||
@abstractmethod
|
||||
def refresh(self) -> None:
|
||||
"""Download / regenerate the provider's raw data files."""
|
||||
|
||||
@abstractmethod
|
||||
def build_lookup(self) -> Lookup:
|
||||
"""Parse the on-disk data files and return a ready-to-query Lookup."""
|
||||
|
||||
@abstractmethod
|
||||
def data_paths(self) -> Sequence[Path]:
|
||||
"""Return the list of files this provider manages — used for staleness
|
||||
detection. Order is not significant."""
|
||||
47
decnet/geoip/factory.py
Normal file
47
decnet/geoip/factory.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""GeoIP provider factory.
|
||||
|
||||
Dispatch key: ``DECNET_GEOIP_PROVIDER`` (default ``rir``). Lazy singleton,
|
||||
same shape as :func:`decnet.bus.factory.get_bus`.
|
||||
|
||||
MVP wires only the RIR provider. ``dbip`` and ``maxmind`` slots are
|
||||
reserved and raise :class:`NotImplementedError` until their subpackages
|
||||
land.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from decnet.geoip.base import Provider
|
||||
|
||||
_cached: Optional[Provider] = None
|
||||
_cached_key: Optional[str] = None
|
||||
|
||||
|
||||
def get_provider() -> Provider:
|
||||
"""Return the configured :class:`Provider` singleton."""
|
||||
global _cached, _cached_key
|
||||
key = os.environ.get("DECNET_GEOIP_PROVIDER", "rir").lower()
|
||||
if _cached is not None and _cached_key == key:
|
||||
return _cached
|
||||
|
||||
if key == "rir":
|
||||
from decnet.geoip.rir.provider import RirProvider
|
||||
provider: Provider = RirProvider()
|
||||
elif key in {"dbip", "maxmind"}:
|
||||
raise NotImplementedError(
|
||||
f"GeoIP provider {key!r} is not wired yet; only 'rir' ships in MVP."
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported GeoIP provider: {key!r}")
|
||||
|
||||
_cached = provider
|
||||
_cached_key = key
|
||||
return provider
|
||||
|
||||
|
||||
def reset_cache() -> None:
|
||||
"""Forget the singleton — tests swap providers via the env var."""
|
||||
global _cached, _cached_key
|
||||
_cached = None
|
||||
_cached_key = None
|
||||
121
decnet/geoip/lookup.py
Normal file
121
decnet/geoip/lookup.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""Provider-agnostic country lookup.
|
||||
|
||||
A :class:`Lookup` is a frozen, sorted array of (start_ip, end_ip, cc)
|
||||
ranges queried via :mod:`bisect`. O(log n) on ~200k ranges.
|
||||
|
||||
Private/loopback/invalid IPv4 and all IPv6 addresses resolve to
|
||||
``None`` — honeypots hit plenty of RFC1918 traffic from our own probes,
|
||||
and IPv6 country-mapping is explicitly out of MVP scope.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import bisect
|
||||
import ipaddress
|
||||
import pickle # nosec B403 — self-produced cache under /var/lib/decnet, never deserialized from untrusted input
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Iterator, List, Optional, Tuple
|
||||
|
||||
Range = Tuple[int, int, str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Lookup:
|
||||
"""Indexed country lookup over IPv4 ranges."""
|
||||
|
||||
# Parallel arrays for bisect: _starts[i] is the start-IP of the i-th
|
||||
# range, _ends[i] its inclusive end, _ccs[i] its country code.
|
||||
_starts: List[int]
|
||||
_ends: List[int]
|
||||
_ccs: List[str]
|
||||
|
||||
@classmethod
|
||||
def from_ranges(cls, ranges: Iterable[Range]) -> "Lookup":
|
||||
"""Build a Lookup from (start, end_inclusive, cc) triples.
|
||||
|
||||
Ranges are sorted by start; overlapping ranges are resolved
|
||||
last-writer-wins when both starts collide. Non-overlapping
|
||||
adjacency is preserved.
|
||||
"""
|
||||
sorted_ranges = sorted(ranges, key=lambda r: (r[0], r[1]))
|
||||
starts: List[int] = []
|
||||
ends: List[int] = []
|
||||
ccs: List[str] = []
|
||||
for start, end, cc in sorted_ranges:
|
||||
if starts and starts[-1] == start:
|
||||
ends[-1] = end
|
||||
ccs[-1] = cc
|
||||
continue
|
||||
starts.append(start)
|
||||
ends.append(end)
|
||||
ccs.append(cc)
|
||||
return cls(starts, ends, ccs)
|
||||
|
||||
def country(self, ip: str) -> Optional[str]:
|
||||
"""Return the 2-letter ISO country code for ``ip`` or ``None``.
|
||||
|
||||
``None`` on: IPv6, private/loopback/link-local/multicast/reserved
|
||||
addresses, malformed strings, and IPs outside every known range.
|
||||
"""
|
||||
try:
|
||||
addr = ipaddress.ip_address(ip)
|
||||
except ValueError:
|
||||
return None
|
||||
if isinstance(addr, ipaddress.IPv6Address):
|
||||
return None
|
||||
if (
|
||||
addr.is_private
|
||||
or addr.is_loopback
|
||||
or addr.is_link_local
|
||||
or addr.is_multicast
|
||||
or addr.is_reserved
|
||||
or addr.is_unspecified
|
||||
):
|
||||
return None
|
||||
|
||||
n = int(addr)
|
||||
# bisect_right gives the first start > n; the candidate range is
|
||||
# the one immediately before it.
|
||||
idx = bisect.bisect_right(self._starts, n) - 1
|
||||
if idx < 0:
|
||||
return None
|
||||
if n <= self._ends[idx]:
|
||||
return self._ccs[idx]
|
||||
return None
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._starts)
|
||||
|
||||
# ---------- persistence ----------
|
||||
|
||||
def save(self, path: Path) -> None:
|
||||
"""Pickle the lookup to *path* (atomic rename)."""
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
tmp.parent.mkdir(parents=True, exist_ok=True)
|
||||
with tmp.open("wb") as fh:
|
||||
pickle.dump(
|
||||
{
|
||||
"version": 1,
|
||||
"starts": self._starts,
|
||||
"ends": self._ends,
|
||||
"ccs": self._ccs,
|
||||
},
|
||||
fh,
|
||||
protocol=pickle.HIGHEST_PROTOCOL,
|
||||
)
|
||||
tmp.replace(path)
|
||||
|
||||
@classmethod
|
||||
def load(cls, path: Path) -> "Lookup":
|
||||
"""Load a pickled lookup from *path*."""
|
||||
with path.open("rb") as fh:
|
||||
data = pickle.load(fh) # nosec B301 — self-produced file under /var/lib/decnet
|
||||
if data.get("version") != 1:
|
||||
raise ValueError(f"unsupported lookup index version: {data.get('version')!r}")
|
||||
return cls(data["starts"], data["ends"], data["ccs"])
|
||||
|
||||
|
||||
def iter_ranges(items: Iterable[Range]) -> Iterator[Range]:
|
||||
"""Passthrough helper — kept so providers can compose iterators without
|
||||
importing private symbols."""
|
||||
yield from items
|
||||
19
decnet/geoip/paths.py
Normal file
19
decnet/geoip/paths.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Filesystem layout for GeoIP data.
|
||||
|
||||
``GEOIP_ROOT`` is where providers drop their raw files and cache indexes.
|
||||
Default ``/var/lib/decnet/geoip`` — ``decnet init`` seeds the directory
|
||||
with ``decnet:decnet`` ownership, mode 0755. Override with
|
||||
``DECNET_GEOIP_ROOT`` for test harnesses.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
GEOIP_ROOT = Path(os.environ.get("DECNET_GEOIP_ROOT", "/var/lib/decnet/geoip"))
|
||||
|
||||
|
||||
def ensure_root() -> Path:
|
||||
"""Create ``GEOIP_ROOT`` if absent and return it. No-op if present."""
|
||||
GEOIP_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
return GEOIP_ROOT
|
||||
87
decnet/geoip/ptr.py
Normal file
87
decnet/geoip/ptr.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""Reverse DNS (PTR record) lookup for attacker IPs.
|
||||
|
||||
Colocated with ``decnet.geoip`` because the shape matches: take an IP,
|
||||
return a piece of supplementary metadata, never raise. Same operator
|
||||
posture as ``enrich_ip`` — a missing PTR must never break profile
|
||||
building.
|
||||
|
||||
The profiler calls this once per attacker IP at first sighting. Never
|
||||
re-resolves — the profiler tracks already-attempted IPs in-memory
|
||||
(``_WorkerState.ptr_attempted``) so a persistent NXDOMAIN doesn't burn
|
||||
2 seconds of tick time on every cycle.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import ipaddress
|
||||
import os
|
||||
import socket
|
||||
from typing import Optional
|
||||
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("geoip.ptr")
|
||||
|
||||
|
||||
_DEFAULT_TIMEOUT = 2.0
|
||||
|
||||
|
||||
def _is_resolvable(ip: str) -> bool:
|
||||
"""True iff ``ip`` is a parseable public address worth querying.
|
||||
|
||||
Private / loopback / link-local / multicast / reserved addresses
|
||||
have no meaningful PTR at the public resolver level, so short-
|
||||
circuit before spending a DNS round-trip on them.
|
||||
"""
|
||||
try:
|
||||
addr = ipaddress.ip_address(ip)
|
||||
except (ValueError, TypeError):
|
||||
return False
|
||||
if addr.is_loopback or addr.is_private or addr.is_link_local:
|
||||
return False
|
||||
if addr.is_multicast or addr.is_reserved or addr.is_unspecified:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _blocking_lookup(ip: str) -> Optional[str]:
|
||||
"""Synchronous PTR lookup — runs in the executor thread."""
|
||||
try:
|
||||
hostname, _aliases, _addrs = socket.gethostbyaddr(ip)
|
||||
return hostname or None
|
||||
except (socket.herror, socket.gaierror, OSError):
|
||||
return None
|
||||
|
||||
|
||||
async def resolve_ptr_record(
|
||||
ip: str,
|
||||
*,
|
||||
timeout: float = _DEFAULT_TIMEOUT,
|
||||
) -> Optional[str]:
|
||||
"""Resolve *ip* to a PTR / rDNS hostname.
|
||||
|
||||
Returns the canonical hostname on success, ``None`` on any failure
|
||||
(NXDOMAIN, timeout, malformed input, env kill-switch). Never raises
|
||||
— PTR is supplementary attacker metadata; a missing lookup must not
|
||||
break profile building.
|
||||
|
||||
Honours ``DECNET_PTR_ENABLED=false`` for locked-down environments
|
||||
where egress DNS is forbidden.
|
||||
"""
|
||||
if os.environ.get("DECNET_PTR_ENABLED", "true").lower() == "false":
|
||||
return None
|
||||
if not _is_resolvable(ip):
|
||||
return None
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
try:
|
||||
return await asyncio.wait_for(
|
||||
loop.run_in_executor(None, _blocking_lookup, ip),
|
||||
timeout=timeout,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
log.debug("ptr: timeout resolving %s after %.1fs", ip, timeout)
|
||||
return None
|
||||
except Exception as exc: # noqa: BLE001 — supplementary metadata
|
||||
log.debug("ptr: resolver crashed for %s: %s", ip, exc)
|
||||
return None
|
||||
9
decnet/geoip/rir/__init__.py
Normal file
9
decnet/geoip/rir/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""RIR delegated-stats provider.
|
||||
|
||||
Free, offline, no license: each Regional Internet Registry publishes a
|
||||
daily plaintext file mapping IPv4 allocations to countries. Together the
|
||||
five RIR files cover the entire assigned IPv4 space.
|
||||
|
||||
Direct imports of :class:`RirProvider` are discouraged — go through
|
||||
:func:`decnet.geoip.factory.get_provider`.
|
||||
"""
|
||||
62
decnet/geoip/rir/fetch.py
Normal file
62
decnet/geoip/rir/fetch.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""RIR delegated-stats download.
|
||||
|
||||
Five public files, ~5 MB total. Pulled over HTTPS with a generic
|
||||
User-Agent (stealth: never identify as DECNET — a RIR log scraper could
|
||||
otherwise correlate our egress to a honeypot operator).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Tuple
|
||||
|
||||
logger = logging.getLogger("decnet.geoip.rir.fetch")
|
||||
|
||||
# (registry_name, url). Extended delegated-stats include the opaque
|
||||
# registration ID we don't use, but they are what the RIRs recommend
|
||||
# consumers pull.
|
||||
RIR_SOURCES: Tuple[Tuple[str, str], ...] = (
|
||||
("arin", "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"),
|
||||
("ripe", "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest"),
|
||||
("apnic", "https://ftp.apnic.net/stats/apnic/delegated-apnic-extended-latest"),
|
||||
("lacnic", "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"),
|
||||
("afrinic", "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest"),
|
||||
)
|
||||
|
||||
# Generic UA — no DECNET/honeypot token. Matches what a stock requests/
|
||||
# urllib script would send if someone forgot to set one.
|
||||
_USER_AGENT = "Mozilla/5.0 (compatible; fetch/1.0)"
|
||||
_TIMEOUT_S = 60
|
||||
|
||||
|
||||
def fetch_all(dest: Path) -> list[Path]:
|
||||
"""Download every RIR file into *dest*. Returns the written paths.
|
||||
|
||||
Atomic per file: we download to ``{name}.txt.tmp`` then rename. A
|
||||
partial failure leaves the previous generation intact.
|
||||
"""
|
||||
dest.mkdir(parents=True, exist_ok=True)
|
||||
written: list[Path] = []
|
||||
for name, url in RIR_SOURCES:
|
||||
target = dest / f"{name}.txt"
|
||||
tmp = target.with_suffix(".txt.tmp")
|
||||
try:
|
||||
_download(url, tmp)
|
||||
tmp.replace(target)
|
||||
written.append(target)
|
||||
logger.info("geoip.rir: fetched %s (%d bytes)", name, target.stat().st_size)
|
||||
except Exception as exc:
|
||||
logger.error("geoip.rir: fetch failed for %s (%s): %s", name, url, exc)
|
||||
if tmp.exists():
|
||||
tmp.unlink(missing_ok=True)
|
||||
# Keep any stale previous file — better outdated than empty.
|
||||
return written
|
||||
|
||||
|
||||
def _download(url: str, dest: Path) -> None:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": _USER_AGENT})
|
||||
# `with` closes the response + dest file on any path.
|
||||
with urllib.request.urlopen(req, timeout=_TIMEOUT_S) as resp, dest.open("wb") as fh: # nosec B310 — fixed https RIR URLs
|
||||
shutil.copyfileobj(resp, fh)
|
||||
70
decnet/geoip/rir/parse.py
Normal file
70
decnet/geoip/rir/parse.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Parser for RIR ``delegated-*-extended`` files.
|
||||
|
||||
Line shape (the bits we care about)::
|
||||
|
||||
ripencc|DE|ipv4|85.214.0.0|65536|20060814|allocated|<opaque-id>
|
||||
|
||||
Fields: registry, country, type (ipv4/ipv6/asn), start, count, date,
|
||||
status, id. We emit one ``(start_int, end_int_inclusive, country)``
|
||||
tuple per ``ipv4|<cc>|...|allocated|assigned`` row.
|
||||
|
||||
Rows skipped:
|
||||
|
||||
* ``ipv6`` and ``asn`` types — IPv6 is out of MVP scope, ASN is a
|
||||
different table.
|
||||
* ``summary`` / ``version`` header lines (registry|*|*|*|*|summary).
|
||||
* Rows with status ``reserved`` / ``available`` — no country assigned.
|
||||
* Rows with country ``*`` or ``ZZ`` — sentinel for unassigned space.
|
||||
* Rows where count is not a valid power-of-two-ish positive integer
|
||||
(the RIR files are usually tidy, but defensive).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Tuple
|
||||
|
||||
Range = Tuple[int, int, str]
|
||||
|
||||
logger = logging.getLogger("decnet.geoip.rir.parse")
|
||||
|
||||
_VALID_STATUSES = frozenset({"allocated", "assigned"})
|
||||
_SENTINEL_CCS = frozenset({"*", "ZZ", ""})
|
||||
|
||||
|
||||
def parse_file(path: Path) -> Iterator[Range]:
|
||||
"""Yield ``(start_int, end_int_inclusive, cc)`` for every ipv4 row."""
|
||||
with path.open("r", encoding="utf-8", errors="replace") as fh:
|
||||
for lineno, raw in enumerate(fh, 1):
|
||||
line = raw.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
parts = line.split("|")
|
||||
if len(parts) < 7:
|
||||
continue
|
||||
_registry, cc, rtype, start, count, _date, status = parts[:7]
|
||||
|
||||
if rtype != "ipv4":
|
||||
continue
|
||||
if status not in _VALID_STATUSES:
|
||||
continue
|
||||
if cc in _SENTINEL_CCS:
|
||||
continue
|
||||
# summary header carries type=ipv4 but start=='*' and status
|
||||
# =='summary' — already filtered by _VALID_STATUSES, but
|
||||
# keep the guard for defensiveness.
|
||||
if start in ("*", ""):
|
||||
continue
|
||||
|
||||
try:
|
||||
start_int = int(ipaddress.IPv4Address(start))
|
||||
n = int(count)
|
||||
except (ValueError, ipaddress.AddressValueError):
|
||||
logger.debug("geoip.rir: skipping malformed line %d in %s", lineno, path.name)
|
||||
continue
|
||||
if n <= 0:
|
||||
continue
|
||||
|
||||
end_int = start_int + n - 1
|
||||
yield (start_int, end_int, cc.upper())
|
||||
74
decnet/geoip/rir/provider.py
Normal file
74
decnet/geoip/rir/provider.py
Normal file
@@ -0,0 +1,74 @@
|
||||
"""RIR provider — orchestrates fetch + parse into a :class:`Lookup`."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from decnet.geoip.base import Provider
|
||||
from decnet.geoip.lookup import Lookup
|
||||
from decnet.geoip.paths import ensure_root
|
||||
from decnet.geoip.rir.fetch import RIR_SOURCES, fetch_all
|
||||
from decnet.geoip.rir.parse import parse_file
|
||||
|
||||
logger = logging.getLogger("decnet.geoip.rir.provider")
|
||||
|
||||
# Pickled lookup cache — skips re-parsing ~5 MB of RIR text on every
|
||||
# profiler restart. Rebuilt whenever any raw file is newer than the
|
||||
# cache, see ``_cache_fresh``.
|
||||
_CACHE_NAME = ".rir_index.pkl"
|
||||
|
||||
|
||||
class RirProvider(Provider):
|
||||
name = "rir"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._root = ensure_root()
|
||||
|
||||
# ---------- Provider interface ----------
|
||||
|
||||
def refresh(self) -> None:
|
||||
logger.info("geoip.rir: refreshing delegated-stats files into %s", self._root)
|
||||
fetch_all(self._root)
|
||||
# Invalidate the cache — next build_lookup regenerates it.
|
||||
cache = self._root / _CACHE_NAME
|
||||
if cache.exists():
|
||||
cache.unlink(missing_ok=True)
|
||||
|
||||
def build_lookup(self) -> Lookup:
|
||||
cache = self._root / _CACHE_NAME
|
||||
if self._cache_fresh(cache):
|
||||
try:
|
||||
lookup = Lookup.load(cache)
|
||||
logger.debug("geoip.rir: loaded cached index (%d ranges)", len(lookup))
|
||||
return lookup
|
||||
except Exception as exc:
|
||||
logger.warning("geoip.rir: cache load failed, rebuilding: %s", exc)
|
||||
|
||||
ranges = []
|
||||
for path in self.data_paths():
|
||||
if not path.exists():
|
||||
continue
|
||||
ranges.extend(parse_file(path))
|
||||
lookup = Lookup.from_ranges(ranges)
|
||||
try:
|
||||
lookup.save(cache)
|
||||
except Exception as exc:
|
||||
logger.warning("geoip.rir: cache save failed: %s", exc)
|
||||
logger.info("geoip.rir: built index with %d ranges", len(lookup))
|
||||
return lookup
|
||||
|
||||
def data_paths(self) -> Sequence[Path]:
|
||||
return [self._root / f"{name}.txt" for name, _url in RIR_SOURCES]
|
||||
|
||||
# ---------- internals ----------
|
||||
|
||||
def _cache_fresh(self, cache: Path) -> bool:
|
||||
"""True when the pickle exists and is at least as new as every raw file."""
|
||||
if not cache.exists():
|
||||
return False
|
||||
cache_mtime = cache.stat().st_mtime
|
||||
for path in self.data_paths():
|
||||
if path.exists() and path.stat().st_mtime > cache_mtime:
|
||||
return False
|
||||
return True
|
||||
Reference in New Issue
Block a user