Adds decnet.fleet.reconciler — a pure async function plus a long-lived
worker — that periodically reconciles the three sources of truth on a
DECNET host:
1. decnet-state.json (CLI-canonical fleet record)
2. fleet_deckies table (DB mirror, written by engine.deployer)
3. docker inspect (actual per-container runtime state)
Drift handling:
* JSON has X, DB doesn't → INSERT (deploy ran with DB offline)
* DB has X (this host), JSON doesn't → DELETE (teardown ran with DB offline)
* Both have X, docker disagrees → flip state to running/failed/degraded
* Docker socket unreachable → leave existing state alone (don't
torch every row to torn_down)
Cross-host safety: deletions are scoped to host_uuid for the local host;
a master that runs both a local fleet and swarm workers will never
clobber a peer's slice.
CLI:
decnet reconcile --once # one-shot, prints counts
decnet reconcile [--interval N] # long-lived worker, mirrors
# orchestrator's lifecycle (control
# listener + heartbeat + tick loop)
Promotes decnet/fleet.py → decnet/fleet/ package so the reconciler can
live alongside it without name collision (build_deckies_from_ini and
all_service_names re-exported unchanged via __init__.py).
14 new tests cover state aggregation rules, all four drift directions,
host_uuid scoping, docker-unreachable safety, and worker shutdown via
the bus control event.
63 lines
2.0 KiB
Python
63 lines
2.0 KiB
Python
from __future__ import annotations
|
|
|
|
import typer
|
|
|
|
from . import utils as _utils
|
|
from .utils import console, log
|
|
|
|
|
|
def register(app: typer.Typer) -> None:
|
|
@app.command(name="reconcile")
|
|
def reconcile_cmd(
|
|
once: bool = typer.Option(
|
|
False, "--once",
|
|
help="Run a single reconcile pass and exit (no daemon loop).",
|
|
),
|
|
interval: int = typer.Option(
|
|
30, "--interval", "-i",
|
|
help="Seconds between reconcile passes (ignored with --once).",
|
|
),
|
|
daemon: bool = typer.Option(
|
|
False, "--daemon", "-d",
|
|
help="Detach to background as a daemon process (long-lived only).",
|
|
),
|
|
) -> None:
|
|
"""Converge fleet state across decnet-state.json, the DB, and docker."""
|
|
import asyncio
|
|
from decnet.web.dependencies import repo
|
|
|
|
if once:
|
|
from decnet.fleet.reconciler import reconcile_once
|
|
|
|
async def _one() -> None:
|
|
await repo.initialize()
|
|
counts = await reconcile_once(repo)
|
|
console.print(
|
|
f"[bold cyan]reconcile:[/] "
|
|
f"inserted={counts['inserted']} "
|
|
f"deleted={counts['deleted']} "
|
|
f"state_updated={counts['state_updated']}"
|
|
)
|
|
asyncio.run(_one())
|
|
return
|
|
|
|
from decnet.fleet.reconciler_worker import fleet_reconciler_worker
|
|
|
|
if daemon:
|
|
log.info("reconciler daemonizing interval=%d", interval)
|
|
_utils._daemonize()
|
|
|
|
log.info("reconciler starting interval=%d", interval)
|
|
console.print(
|
|
f"[bold cyan]Fleet reconciler starting[/] (interval: {interval}s)"
|
|
)
|
|
|
|
async def _run() -> None:
|
|
await repo.initialize()
|
|
await fleet_reconciler_worker(repo, interval=interval)
|
|
|
|
try:
|
|
asyncio.run(_run())
|
|
except KeyboardInterrupt:
|
|
console.print("\n[yellow]Reconciler stopped.[/]")
|