fix(topology/allocator): widen default subnet base to /12 for mass-scale

A 30-LAN generate request already fits in 172.20.0.0/16, but trees
with depth/branching that multiply past 256 (e.g. depth=6,
branching=4 ≈ 5k LANs) hit AllocatorExhausted before the first
write.

SubnetAllocator now accepts a full CIDR base ("172.16.0.0/12" →
4096 /24s) in addition to the legacy two-octet shorthand ("172.20",
auto-lifted to /16). The parent must be ≤/24; a /24 base yields
exactly one slot. Iteration order is preserved for /16 bases so
existing topologies keep their third-octet sweep; /12 adds a
second-octet dimension underneath.

Defaults bumped to 172.16.0.0/12: TopologyConfig.subnet_base_prefix,
/next-subnet query param, and the mutator's add-LAN fallback. The
field pattern widens to accept CIDR. create-blank and manual LAN
CRUD still use "10.0" (lifts to /16) — one DMZ LAN per topology,
256 is plenty.
This commit is contained in:
2026-04-24 18:57:55 -04:00
parent 207f791684
commit f3408d5e62
5 changed files with 100 additions and 15 deletions

View File

@@ -118,7 +118,7 @@ async def apply_add_lan(
if subnet is None: if subnet is None:
reserved = await reserved_subnets(repo) reserved = await reserved_subnets(repo)
alloc = SubnetAllocator(base_prefix="172.20", reserved=reserved) alloc = SubnetAllocator(base_prefix="172.16.0.0/12", reserved=reserved)
subnet = alloc.next_free() subnet = alloc.next_free()
await repo.add_lan( await repo.add_lan(

View File

@@ -69,31 +69,47 @@ class IPAllocator:
class SubnetAllocator: class SubnetAllocator:
"""Hands out ``/24`` subnets under a base prefix (e.g. ``172.20``).""" """Hands out ``/24`` subnets inside a parent network.
_MAX_INDEX = 256 # 172.20.0/24 .. 172.20.255/24 Accepted ``base_prefix`` forms:
* Full CIDR: ``"172.16.0.0/12"`` → 4096 ``/24`` slots
* Legacy two-octet shorthand: ``"172.20"`` → auto-lifted to
``"172.20.0.0/16"`` (256 slots), for backward compat with
configs written before mass-scale topologies were a thing.
The parent must be at most ``/24`` wide (i.e. its prefix length
must be ≤ 24); a ``/24`` base yields exactly one slot, anything
larger yields more.
"""
def __init__( def __init__(
self, self,
base_prefix: str, base_prefix: str,
reserved: Iterable[str] = (), reserved: Iterable[str] = (),
) -> None: ) -> None:
self._base = base_prefix.rstrip(".") parent = _parse_base(base_prefix)
if parent.prefixlen > 24:
raise ValueError(
f"subnet base {parent.with_prefixlen} is narrower than /24; "
"cannot carve /24 children out of it"
)
self._parent = parent
# A generator over all /24 subnets of the parent. ipaddress
# yields them in order, so the allocator preserves the legacy
# "sequential-third-octet" behaviour for /16 bases. For /12
# bases you get second.third-octet sweep.
self._iter = parent.subnets(new_prefix=24) if parent.prefixlen < 24 else iter([parent])
self._reserved: set[str] = {s for s in reserved} self._reserved: set[str] = {s for s in reserved}
self._cursor = 0
def _candidate(self, idx: int) -> str:
return f"{self._base}.{idx}.0/24"
def next_free(self) -> str: def next_free(self) -> str:
while self._cursor < self._MAX_INDEX: for net in self._iter:
subnet = self._candidate(self._cursor) subnet = net.with_prefixlen
self._cursor += 1
if subnet not in self._reserved: if subnet not in self._reserved:
self._reserved.add(subnet) self._reserved.add(subnet)
return subnet return subnet
raise AllocatorExhausted( raise AllocatorExhausted(
f"no free /24s left under {self._base}.0.0/16" f"no free /24s left under {self._parent.with_prefixlen}"
) )
def reserve(self, subnet: str) -> None: def reserve(self, subnet: str) -> None:
@@ -103,6 +119,21 @@ class SubnetAllocator:
return subnet not in self._reserved return subnet not in self._reserved
def _parse_base(base_prefix: str) -> IPv4Network:
"""Accept either ``'a.b.c.d/n'`` or legacy ``'a.b'`` shorthand."""
stripped = base_prefix.strip().rstrip(".")
if "/" in stripped:
return IPv4Network(stripped, strict=False)
octets = stripped.split(".")
if len(octets) == 2:
return IPv4Network(f"{stripped}.0.0/16", strict=False)
if len(octets) == 4:
return IPv4Network(f"{stripped}/24", strict=False)
raise ValueError(
f"unrecognised subnet base {base_prefix!r}; expected 'x.y' or CIDR"
)
# Topology statuses whose LANs still claim subnets. torn_down is the # Topology statuses whose LANs still claim subnets. torn_down is the
# only state that releases its networks back to the pool. # only state that releases its networks back to the pool.
_SUBNET_CLAIMING_STATES: frozenset[str] = frozenset( _SUBNET_CLAIMING_STATES: frozenset[str] = frozenset(

View File

@@ -30,8 +30,16 @@ class TopologyConfig(BaseModel):
# from its LAN to a non-parent, non-child LAN. 0.0 yields a tree. # from its LAN to a non-parent, non-child LAN. 0.0 yields a tree.
cross_edge_probability: float = Field(default=0.0, ge=0.0, le=1.0) cross_edge_probability: float = Field(default=0.0, ge=0.0, le=1.0)
# IP allocation base. LANs get sequential /24s starting here. # IP allocation base. LANs get sequential /24s carved out of this
subnet_base_prefix: str = Field(default="172.20", pattern=r"^\d{1,3}\.\d{1,3}$") # network. Accepts either a full CIDR (e.g. ``172.16.0.0/12`` for
# 4096 slots) or the legacy two-octet shorthand ``172.20`` which
# auto-lifts to ``172.20.0.0/16`` (256 slots). Default is a /12
# so mass-scale topologies (depth/branching trees with >256 LANs)
# don't exhaust the pool on first generation.
subnet_base_prefix: str = Field(
default="172.16.0.0/12",
pattern=r"^\d{1,3}\.\d{1,3}(\.\d{1,3}\.\d{1,3}/\d{1,2})?$",
)
# Service selection — reuses decnet.fleet.build_deckies' randomizer. # Service selection — reuses decnet.fleet.build_deckies' randomizer.
randomize_services: bool = Field(default=True) randomize_services: bool = Field(default=True)

View File

@@ -85,7 +85,10 @@ async def api_list_archetypes(
) )
@_traced("api.topology.catalog.next_subnet") @_traced("api.topology.catalog.next_subnet")
async def api_next_subnet( async def api_next_subnet(
base: str = Query(default="172.20", pattern=r"^\d{1,3}\.\d{1,3}$"), base: str = Query(
default="172.16.0.0/12",
pattern=r"^\d{1,3}\.\d{1,3}(\.\d{1,3}\.\d{1,3}/\d{1,2})?$",
),
_viewer: dict = Depends(require_viewer), _viewer: dict = Depends(require_viewer),
) -> NextSubnetResponse: ) -> NextSubnetResponse:
reserved = await reserved_subnets(repo) reserved = await reserved_subnets(repo)

View File

@@ -86,6 +86,49 @@ def test_subnet_allocator_exhaustion_raises():
s.next_free() s.next_free()
def test_subnet_allocator_accepts_cidr_base():
"""Full-CIDR base form is equivalent to the legacy two-octet form."""
s = SubnetAllocator("172.20.0.0/16")
assert s.next_free() == "172.20.0.0/24"
assert s.next_free() == "172.20.1.0/24"
def test_subnet_allocator_slash12_yields_more_than_256_slots():
"""The whole point of widening: a /12 base must outlast a single /16."""
s = SubnetAllocator("172.16.0.0/12")
# Burn the first 256 /24s. With a /16 base this is exhaustion; with
# /12 we should roll into 172.17.x.x without raising.
for _ in range(256):
s.next_free()
nxt = s.next_free()
assert nxt.startswith("172.17.")
assert nxt.endswith(".0/24")
def test_subnet_allocator_slash12_total_capacity_is_4096():
s = SubnetAllocator("172.16.0.0/12")
count = 0
try:
while True:
s.next_free()
count += 1
except AllocatorExhausted:
pass
assert count == 4096
def test_subnet_allocator_rejects_narrower_than_slash24():
with pytest.raises(ValueError, match="narrower than /24"):
SubnetAllocator("192.168.1.0/25")
def test_subnet_allocator_exhausted_message_uses_parent_cidr():
s = SubnetAllocator("172.20.0.0/24") # exactly one slot
s.next_free()
with pytest.raises(AllocatorExhausted, match="172.20.0.0/24"):
s.next_free()
# --------------------------------------------------------------------- reserved_subnets # --------------------------------------------------------------------- reserved_subnets