feat(mazenet): step 7 — topology_mutations queue + mutator reconciler
Adds the live-mutation pipeline for active/degraded topologies: * TopologyMutation table with composite index (state, topology_id) so the watch-loop guard query stays O(log n). * claim_next_mutation is a single atomic UPDATE ... WHERE state='pending' so racing reconcilers deterministically pick one winner; losers see rowcount=0 and skip. * reconcile_topologies drains pending rows per live topology, applies via decnet.mutator.ops.dispatch, and on failure marks the mutation failed + transitions topology to degraded. * run_watch_loop gains a gated branch: flat-fleet mutate_all runs every tick unchanged; the reconciler only enters when the cheap has_pending_topology_mutation guard returns True. * apply_* ops re-check hard invariants (names, IP collisions, subnet overlap, known services, service_config shape) after every mutation so the repo never lands in an invalid state. * CLI: 'decnet topology mutate' / 'mutations' subcommands.
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
from datetime import datetime, timezone
|
||||
from typing import Literal, Optional, Any, List, Annotated
|
||||
from uuid import uuid4
|
||||
from sqlalchemy import Column, Text, UniqueConstraint
|
||||
from sqlalchemy import Column, Index, Text, UniqueConstraint
|
||||
from sqlalchemy.dialects.mysql import MEDIUMTEXT
|
||||
from sqlmodel import SQLModel, Field
|
||||
from pydantic import BaseModel, ConfigDict, Field as PydanticField, BeforeValidator
|
||||
@@ -309,6 +309,44 @@ class TopologyStatusEvent(SQLModel, table=True):
|
||||
)
|
||||
|
||||
|
||||
class TopologyMutation(SQLModel, table=True):
|
||||
"""Operator-requested live mutation for an active MazeNET topology.
|
||||
|
||||
Each row is one intent (add LAN, attach decky, etc.). The mutator's
|
||||
reconciler claims ``pending`` rows atomically (see
|
||||
``SQLModelRepository.claim_next_mutation``), applies them against
|
||||
Docker, and writes ``applied`` or ``failed`` back. The ``(state,
|
||||
topology_id)`` composite index keeps the watch-loop guard query
|
||||
cheap even with years of mutation history.
|
||||
"""
|
||||
__tablename__ = "topology_mutations"
|
||||
__table_args__ = (
|
||||
Index(
|
||||
"ix_topology_mutations_state_topology",
|
||||
"state",
|
||||
"topology_id",
|
||||
),
|
||||
)
|
||||
id: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
|
||||
topology_id: str = Field(foreign_key="topologies.id", index=True)
|
||||
# add_lan|remove_lan|attach_decky|detach_decky|remove_decky|
|
||||
# update_decky|update_lan
|
||||
op: str = Field(index=True)
|
||||
# JSON-serialised op payload (keys depend on ``op``).
|
||||
payload: str = Field(
|
||||
sa_column=Column("payload", _BIG_TEXT, nullable=False, default="{}")
|
||||
)
|
||||
# pending|applying|applied|failed
|
||||
state: str = Field(default="pending", index=True)
|
||||
requested_at: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc), index=True
|
||||
)
|
||||
applied_at: Optional[datetime] = Field(default=None)
|
||||
reason: Optional[str] = Field(
|
||||
default=None, sa_column=Column("reason", Text, nullable=True)
|
||||
)
|
||||
|
||||
|
||||
# --- API Request/Response Models (Pydantic) ---
|
||||
|
||||
class Token(BaseModel):
|
||||
|
||||
Reference in New Issue
Block a user