feat(mazenet): step 7 — topology_mutations queue + mutator reconciler
Adds the live-mutation pipeline for active/degraded topologies: * TopologyMutation table with composite index (state, topology_id) so the watch-loop guard query stays O(log n). * claim_next_mutation is a single atomic UPDATE ... WHERE state='pending' so racing reconcilers deterministically pick one winner; losers see rowcount=0 and skip. * reconcile_topologies drains pending rows per live topology, applies via decnet.mutator.ops.dispatch, and on failure marks the mutation failed + transitions topology to degraded. * run_watch_loop gains a gated branch: flat-fleet mutate_all runs every tick unchanged; the reconciler only enters when the cheap has_pending_topology_mutation guard returns True. * apply_* ops re-check hard invariants (names, IP collisions, subnet overlap, known services, service_config shape) after every mutation so the repo never lands in an invalid state. * CLI: 'decnet topology mutate' / 'mutations' subcommands.
This commit is contained in:
@@ -202,6 +202,86 @@ def _teardown(
|
||||
_console.print(f"[green]Topology {topology_id} torn down.[/]")
|
||||
|
||||
|
||||
@_group.command("mutate")
|
||||
def _mutate(
|
||||
topology_id: str = typer.Argument(..., help="Topology id (active or degraded)"),
|
||||
op: str = typer.Argument(
|
||||
...,
|
||||
help=(
|
||||
"One of: add_lan, remove_lan, attach_decky, detach_decky, "
|
||||
"remove_decky, update_decky, update_lan"
|
||||
),
|
||||
),
|
||||
payload_json: str = typer.Option(
|
||||
"{}",
|
||||
"--payload-json",
|
||||
help="JSON payload for the op (see mutator.ops for keys)",
|
||||
),
|
||||
expected_version: Optional[int] = typer.Option(
|
||||
None,
|
||||
"--expected-version",
|
||||
help="Optimistic-concurrency guard; enqueue fails with a "
|
||||
"VersionConflict if the topology has since been mutated.",
|
||||
),
|
||||
) -> None:
|
||||
"""Enqueue a live mutation. The mutator's watch loop applies it."""
|
||||
_require_master_mode("topology mutate")
|
||||
import json
|
||||
|
||||
try:
|
||||
payload = json.loads(payload_json)
|
||||
except ValueError as e:
|
||||
_console.print(f"[red]Invalid JSON: {e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
async def _go() -> str:
|
||||
repo = await _repo()
|
||||
return await repo.enqueue_topology_mutation(
|
||||
topology_id, op, payload, expected_version=expected_version,
|
||||
)
|
||||
|
||||
mid = asyncio.run(_go())
|
||||
_console.print(
|
||||
f"[green]Mutation enqueued[/] — id=[bold]{mid}[/] op={op} "
|
||||
f"(watch for state=applied on [cyan]topology mutations {topology_id}[/])"
|
||||
)
|
||||
|
||||
|
||||
@_group.command("mutations")
|
||||
def _mutations(
|
||||
topology_id: str = typer.Argument(..., help="Topology id"),
|
||||
state: Optional[str] = typer.Option(
|
||||
None,
|
||||
"--state",
|
||||
help="Filter to one of pending|applying|applied|failed",
|
||||
),
|
||||
) -> None:
|
||||
"""List queued/applied mutations for a topology."""
|
||||
_require_master_mode("topology mutations")
|
||||
|
||||
async def _go() -> list[dict]:
|
||||
repo = await _repo()
|
||||
return await repo.list_topology_mutations(topology_id, state=state)
|
||||
|
||||
rows = asyncio.run(_go())
|
||||
if not rows:
|
||||
_console.print("[yellow]No mutations.[/]")
|
||||
return
|
||||
table = Table(title=f"Mutations — topology {topology_id}")
|
||||
for col in ("id", "op", "state", "requested_at", "applied_at", "reason"):
|
||||
table.add_column(col)
|
||||
for r in rows:
|
||||
table.add_row(
|
||||
str(r["id"]),
|
||||
str(r["op"]),
|
||||
str(r["state"]),
|
||||
str(r.get("requested_at", "")),
|
||||
str(r.get("applied_at") or ""),
|
||||
str(r.get("reason") or ""),
|
||||
)
|
||||
_console.print(table)
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
app.add_typer(_group, name="topology")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user