Files
DECNET/decnet/asn/iptoasn/fetch.py
anti f2b3393669 chore: relicense to AGPL-3.0-or-later and add SPDX headers
Replaces LICENSE (GPLv3 -> AGPLv3) and prepends
`SPDX-License-Identifier: AGPL-3.0-or-later` to every source file
across decnet/, decnet_web/, tests/, scripts/, and tools/.

Rationale: closes the GPLv3 ASP loophole so any party operating a
modified DECNET as a network service must offer their modified
source. Personal copyright (Samuel Paschuan) + inbound=outbound
contributions make a future unilateral relicense infeasible.

- LICENSE: full AGPL-3.0 text (gnu.org/licenses/agpl-3.0.txt)
- COPYRIGHT: project copyright notice
- tools/add_spdx_headers.py: idempotent header injector
  (shebang- and PEP 263-aware)

Touches 1565 source files (.py, .ts, .tsx, .js, .jsx, .css, .sh).
No behavior change; comments only.
2026-05-22 21:04:16 -04:00

65 lines
2.3 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
"""iptoasn.com bulk dump download.
One file: ``ip2asn-v4.tsv.gz``, ~5 MB compressed, refreshed daily.
Pulled over HTTPS with the same generic UA the geoip RIR fetcher uses
(stealth: never identify as DECNET — public-data scrapers correlated to
honeypot operator egress is the threat model).
"""
from __future__ import annotations
import logging
import shutil
import urllib.request
from pathlib import Path
from typing import Tuple
logger = logging.getLogger("decnet.asn.iptoasn.fetch")
# Mirror the (name, url) tuple shape of geoip.rir.fetch so test
# harnesses can swap one for the other.
IPTOASN_SOURCES: Tuple[Tuple[str, str], ...] = (
("ip2asn-v4", "https://iptoasn.com/data/ip2asn-v4.tsv.gz"),
)
# Generic UA — matches geoip.rir.fetch. iptoasn.com explicitly releases
# the data into the public domain and does NOT require an identifying UA,
# so we keep DECNET stealth instead of advertising.
_USER_AGENT = "Mozilla/5.0 (compatible; fetch/1.0)"
_TIMEOUT_S = 60
def fetch_all(dest: Path) -> list[Path]:
"""Download every iptoasn file into *dest*. Returns the written paths.
Atomic per file: download to ``{name}.tsv.gz.tmp`` then rename. A
partial failure leaves the previous generation intact.
"""
dest.mkdir(parents=True, exist_ok=True)
written: list[Path] = []
for name, url in IPTOASN_SOURCES:
target = dest / f"{name}.tsv.gz"
tmp = target.with_suffix(".gz.tmp")
try:
_download(url, tmp)
tmp.replace(target)
written.append(target)
logger.info(
"asn.iptoasn: fetched %s (%d bytes)",
name, target.stat().st_size,
)
except Exception as exc:
logger.error(
"asn.iptoasn: fetch failed for %s (%s): %s", name, url, exc
)
if tmp.exists():
tmp.unlink(missing_ok=True)
# Keep any stale previous file — better outdated than empty.
return written
def _download(url: str, dest: Path) -> None:
req = urllib.request.Request(url, headers={"User-Agent": _USER_AGENT})
with urllib.request.urlopen(req, timeout=_TIMEOUT_S) as resp, dest.open("wb") as fh: # nosec B310 — fixed https iptoasn URL
shutil.copyfileobj(resp, fh)