Replaces LICENSE (GPLv3 -> AGPLv3) and prepends `SPDX-License-Identifier: AGPL-3.0-or-later` to every source file across decnet/, decnet_web/, tests/, scripts/, and tools/. Rationale: closes the GPLv3 ASP loophole so any party operating a modified DECNET as a network service must offer their modified source. Personal copyright (Samuel Paschuan) + inbound=outbound contributions make a future unilateral relicense infeasible. - LICENSE: full AGPL-3.0 text (gnu.org/licenses/agpl-3.0.txt) - COPYRIGHT: project copyright notice - tools/add_spdx_headers.py: idempotent header injector (shebang- and PEP 263-aware) Touches 1565 source files (.py, .ts, .tsx, .js, .jsx, .css, .sh). No behavior change; comments only.
72 lines
2.5 KiB
Python
72 lines
2.5 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""Parser for RIR ``delegated-*-extended`` files.
|
|
|
|
Line shape (the bits we care about)::
|
|
|
|
ripencc|DE|ipv4|85.214.0.0|65536|20060814|allocated|<opaque-id>
|
|
|
|
Fields: registry, country, type (ipv4/ipv6/asn), start, count, date,
|
|
status, id. We emit one ``(start_int, end_int_inclusive, country)``
|
|
tuple per ``ipv4|<cc>|...|allocated|assigned`` row.
|
|
|
|
Rows skipped:
|
|
|
|
* ``ipv6`` and ``asn`` types — IPv6 is out of MVP scope, ASN is a
|
|
different table.
|
|
* ``summary`` / ``version`` header lines (registry|*|*|*|*|summary).
|
|
* Rows with status ``reserved`` / ``available`` — no country assigned.
|
|
* Rows with country ``*`` or ``ZZ`` — sentinel for unassigned space.
|
|
* Rows where count is not a valid power-of-two-ish positive integer
|
|
(the RIR files are usually tidy, but defensive).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import ipaddress
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Iterator, Tuple
|
|
|
|
Range = Tuple[int, int, str]
|
|
|
|
logger = logging.getLogger("decnet.geoip.rir.parse")
|
|
|
|
_VALID_STATUSES = frozenset({"allocated", "assigned"})
|
|
_SENTINEL_CCS = frozenset({"*", "ZZ", ""})
|
|
|
|
|
|
def parse_file(path: Path) -> Iterator[Range]:
|
|
"""Yield ``(start_int, end_int_inclusive, cc)`` for every ipv4 row."""
|
|
with path.open("r", encoding="utf-8", errors="replace") as fh:
|
|
for lineno, raw in enumerate(fh, 1):
|
|
line = raw.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
parts = line.split("|")
|
|
if len(parts) < 7:
|
|
continue
|
|
_registry, cc, rtype, start, count, _date, status = parts[:7]
|
|
|
|
if rtype != "ipv4":
|
|
continue
|
|
if status not in _VALID_STATUSES:
|
|
continue
|
|
if cc in _SENTINEL_CCS:
|
|
continue
|
|
# summary header carries type=ipv4 but start=='*' and status
|
|
# =='summary' — already filtered by _VALID_STATUSES, but
|
|
# keep the guard for defensiveness.
|
|
if start in ("*", ""):
|
|
continue
|
|
|
|
try:
|
|
start_int = int(ipaddress.IPv4Address(start))
|
|
n = int(count)
|
|
except (ValueError, ipaddress.AddressValueError):
|
|
logger.debug("geoip.rir: skipping malformed line %d in %s", lineno, path.name)
|
|
continue
|
|
if n <= 0:
|
|
continue
|
|
|
|
end_int = start_int + n - 1
|
|
yield (start_int, end_int, cc.upper())
|