fix(engine): per-scope docker compose project names

Every compose invocation used -p decnet so fleet + every topology
lived in one docker compose project. --remove-orphans, run during
fleet pre-up cleanup and on every topology teardown / rollback, then
swept every container in the project not listed in the current compose
file — wiping sibling topologies and the flat fleet along with the
intended target.

Parameterize project on _compose / _compose_with_retry / _compose_ps
(default FLEET_COMPOSE_PROJECT="decnet"). Add _topology_compose_project
that returns decnet-topo-<id8>, and pass it through every topology
compose call site (master deploy_topology + rollback + post-deploy ps,
master teardown_topology, agent apply, agent teardown, all four live
service mutations on topology deckies). Fleet calls keep the default
and are unaffected.

Migration: live containers from before this fix remain in the shared
"decnet" project and need a one-time manual cleanup before they're
reachable to the new topology code paths.
This commit is contained in:
2026-05-22 18:29:33 -04:00
parent 1b90048715
commit ee10b55cfe
5 changed files with 201 additions and 25 deletions

View File

@@ -28,6 +28,7 @@ from decnet.engine.deployer import (
_compose_with_retry,
_teardown_order,
_topology_compose_path,
_topology_compose_project,
)
from decnet.logging import get_logger
from decnet.network import create_bridge_network, remove_bridge_network
@@ -118,12 +119,16 @@ def _materialise(hydrated: dict[str, Any], topology_id: str) -> None:
the base is the cheapest way to make this race impossible.
"""
compose_path = _topology_compose_path(topology_id)
compose_project = _topology_compose_project(topology_id)
client = docker.from_env()
for lan in hydrated["lans"]:
net_name = _topology_network_name(topology_id, lan["name"])
create_bridge_network(client, net_name, lan["subnet"], internal=not lan["is_dmz"])
write_topology_compose(hydrated, compose_path)
_compose_with_retry("up", "--build", "-d", "--always-recreate-deps", compose_file=compose_path)
_compose_with_retry(
"up", "--build", "-d", "--always-recreate-deps",
compose_file=compose_path, project=compose_project,
)
async def apply(
@@ -160,12 +165,16 @@ async def teardown(
# LAN membership list via the hydrated blob if available.
hydrated = row.hydrated if row and row.topology_id == topology_id else None
compose_path = _topology_compose_path(topology_id)
compose_project = _topology_compose_project(topology_id)
client = docker.from_env()
def _dismantle() -> None:
if compose_path.exists():
try:
_compose("down", "--remove-orphans", compose_file=compose_path)
_compose(
"down", "--remove-orphans",
compose_file=compose_path, project=compose_project,
)
except subprocess.CalledProcessError as exc:
log.warning(
"topology %s compose down failed (continuing): %s",