feat(engine,api): add orphan topology resource reaper
Topology rows deleted without a proper teardown leave Docker containers and bridge networks behind, holding IPAM pools that cause 403 "Pool overlaps" on the next deploy at the same subnet. - engine/reaper.py walks the local Docker daemon, extracts the 8-char topology prefix from every decnet_t_* resource, and force-removes containers + networks whose prefix is not in the repo. - POST /api/v1/topologies/reap-orphans (admin-only) returns a report of live/orphan prefixes and what was removed. - Resources belonging to live topologies are never touched; per-resource errors are captured without aborting the sweep.
This commit is contained in:
@@ -21,6 +21,7 @@ from .api_get_topology import router as _get_router
|
||||
from .api_lan_crud import router as _lan_router
|
||||
from .api_list_topologies import router as _list_router
|
||||
from .api_mutations import router as _mutations_router
|
||||
from .api_reap_orphans import router as _reap_router
|
||||
from .api_teardown_topology import router as _teardown_router
|
||||
|
||||
topology_router = APIRouter(prefix="/topologies", tags=["topologies"])
|
||||
@@ -34,6 +35,7 @@ topology_router.include_router(_catalog_router)
|
||||
topology_router.include_router(_list_router)
|
||||
topology_router.include_router(_create_blank_router)
|
||||
topology_router.include_router(_create_router)
|
||||
topology_router.include_router(_reap_router)
|
||||
topology_router.include_router(_deploy_router)
|
||||
topology_router.include_router(_teardown_router)
|
||||
topology_router.include_router(_delete_router)
|
||||
|
||||
46
decnet/web/router/topology/api_reap_orphans.py
Normal file
46
decnet/web/router/topology/api_reap_orphans.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""POST /topologies/reap-orphans — remove Docker resources for topology
|
||||
ids the DB no longer knows about.
|
||||
|
||||
A topology row deleted outside the teardown flow (operator error,
|
||||
crashed master, direct DB edit) leaves its containers and bridge
|
||||
networks behind. The orphan networks keep their IPAM pools, so the
|
||||
next deploy at the same subnet hits a 403 ``Pool overlaps`` from the
|
||||
Docker daemon.
|
||||
|
||||
This endpoint walks the local Docker daemon, computes the set of
|
||||
topology prefixes still known to the repo, and force-removes every
|
||||
container + network whose prefix is orphaned. Resources belonging to
|
||||
live topologies are never touched.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from decnet.engine.reaper import reap_orphan_topology_resources
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import repo, require_admin
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post(
|
||||
"/reap-orphans",
|
||||
tags=["MazeNET Topologies"],
|
||||
responses={
|
||||
401: {"description": "Missing or invalid credentials"},
|
||||
403: {"description": "Insufficient permissions"},
|
||||
},
|
||||
)
|
||||
@_traced("api.topology.reap_orphans")
|
||||
async def api_reap_orphans(
|
||||
_admin: dict = Depends(require_admin),
|
||||
) -> dict:
|
||||
"""Reap Docker resources whose topology id is absent from the DB.
|
||||
|
||||
Returns a report with the live prefixes, the orphan prefixes that
|
||||
were identified, every container + network actually removed, and
|
||||
any per-resource errors encountered. Errors are non-fatal — a
|
||||
single stuck resource does not abort the sweep.
|
||||
"""
|
||||
report = await reap_orphan_topology_resources(repo)
|
||||
return report.to_dict()
|
||||
Reference in New Issue
Block a user