From 2c35d60d452acbc4cdd4fe38d4dbbc9ec744704f Mon Sep 17 00:00:00 2001 From: anti Date: Mon, 20 Apr 2026 23:07:31 -0400 Subject: [PATCH] feat(mazenet): host port-collision warning at deploy time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add check_no_host_port_collision: enumerate the ports the topology's gateways will publish (forwards_l3=True × svc.ports), probe live listeners via psutil, emit a 'warning'-severity PORT_COLLISION issue per overlap. Live-only — invoked from deploy_topology just after dry-run branching, so unit tests that exercise validate() stay hermetic. Warning rather than error because docker-compose up will hard-fail on a real collision anyway; this just gives operators a cleaner log line ahead of the compose failure. --- decnet/engine/deployer.py | 13 +++++++++- decnet/topology/validate.py | 50 +++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/decnet/engine/deployer.py b/decnet/engine/deployer.py index ef8b1796..c9c6bbab 100644 --- a/decnet/engine/deployer.py +++ b/decnet/engine/deployer.py @@ -35,7 +35,12 @@ from decnet.topology.compose import ( ) from decnet.topology.persistence import hydrate, transition_status from decnet.topology.status import TopologyStatus -from decnet.topology.validate import ValidationError, errors as _validation_errors, validate as _validate_topology +from decnet.topology.validate import ( + ValidationError, + check_no_host_port_collision, + errors as _validation_errors, + validate as _validate_topology, +) log = get_logger("engine") console = Console() @@ -338,6 +343,12 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N log.info("topology %s dry-run complete", topology_id) return + # Host-state precheck: PORT_COLLISION is a warning (docker-compose + # will hard-fail if the port is actually unavailable; we just want + # the clearer log line up-front). Only runs at live deploy. + for w in check_no_host_port_collision(hydrated): + log.warning("[%s] %s", w.code, w.message) + await transition_status(repo, topology_id, TopologyStatus.DEPLOYING) client = docker.from_env() diff --git a/decnet/topology/validate.py b/decnet/topology/validate.py index 3043af3f..338e55e9 100644 --- a/decnet/topology/validate.py +++ b/decnet/topology/validate.py @@ -16,6 +16,7 @@ from ipaddress import IPv4Address, IPv4Network from typing import Any, Callable, Literal from decnet.fleet import all_service_names +from decnet.services.registry import get_service Severity = Literal["error", "warning"] @@ -282,6 +283,55 @@ def check_service_config_shape(h: dict[str, Any]) -> list[ValidationIssue]: return issues +def check_no_host_port_collision(h: dict[str, Any]) -> list[ValidationIssue]: + """Flag gateway service ports that are already bound on the host. + + Only gateway deckies (``forwards_l3=True`` in decky_config) publish + ports (see decnet/topology/compose.py). Best-effort: if ``psutil`` + isn't importable or probing fails, returns no issues. + """ + wanted: dict[int, str] = {} # host_port → gateway decky name + for d in h["deckies"]: + cfg = d.get("decky_config") or {} + if not cfg.get("forwards_l3"): + continue + for svc_name in d.get("services", []): + svc = get_service(svc_name) + if svc is None or getattr(svc, "fleet_singleton", False): + continue + for port in getattr(svc, "ports", []) or []: + wanted.setdefault(int(port), d["name"]) + if not wanted: + return [] + + try: + import psutil # type: ignore + bound = { + c.laddr.port + for c in psutil.net_connections(kind="inet") + if c.status == psutil.CONN_LISTEN and c.laddr + } + except Exception: + return [] + + issues: list[ValidationIssue] = [] + for port, decky_name in wanted.items(): + if port in bound: + issues.append( + ValidationIssue( + "warning", + "PORT_COLLISION", + f"host port {port} is already bound; " + f"gateway {decky_name!r} may fail to publish it", + target={"decky": decky_name, "port": port}, + ) + ) + return issues + + +# Pure-data rules. Host-state rules (like PORT_COLLISION) are +# *not* listed here — they're called separately by the live deployer +# so that unit tests exercising validate() stay hermetic. _RULES: list[Callable[[dict[str, Any]], list[ValidationIssue]]] = [ check_exactly_one_dmz, check_all_lans_connected_to_dmz,