feat(orchestrator): periodic prune of orchestrator_events
Every 100 ticks, trim per-dst_decky_uuid history down to 10000 rows (oldest first). Keeps the events table bounded on long-running fleets without paying the cost on every write.
This commit is contained in:
@@ -25,6 +25,13 @@ from decnet.web.db.repository import BaseRepository
|
|||||||
|
|
||||||
logger = get_logger("orchestrator")
|
logger = get_logger("orchestrator")
|
||||||
|
|
||||||
|
# Periodic-prune knobs. Trim per-decky history every _PRUNE_EVERY_TICKS
|
||||||
|
# to keep orchestrator_events from unbounded growth on long-running
|
||||||
|
# fleets. Cheap on the write path (zero overhead per tick); the cost
|
||||||
|
# pays in once every ~100 ticks.
|
||||||
|
_PRUNE_EVERY_TICKS = 100
|
||||||
|
_PRUNE_PER_DST_CAP = 10000
|
||||||
|
|
||||||
|
|
||||||
async def orchestrator_worker(
|
async def orchestrator_worker(
|
||||||
repo: BaseRepository,
|
repo: BaseRepository,
|
||||||
@@ -54,6 +61,7 @@ async def orchestrator_worker(
|
|||||||
control_task = asyncio.create_task(
|
control_task = asyncio.create_task(
|
||||||
run_control_listener(bus, "orchestrator", shutdown),
|
run_control_listener(bus, "orchestrator", shutdown),
|
||||||
)
|
)
|
||||||
|
tick_n = 0
|
||||||
try:
|
try:
|
||||||
while not shutdown.is_set():
|
while not shutdown.is_set():
|
||||||
try:
|
try:
|
||||||
@@ -66,6 +74,19 @@ async def orchestrator_worker(
|
|||||||
await _one_tick(repo, driver, bus)
|
await _one_tick(repo, driver, bus)
|
||||||
except Exception as exc: # noqa: BLE001
|
except Exception as exc: # noqa: BLE001
|
||||||
logger.error("orchestrator tick failed: %s", exc)
|
logger.error("orchestrator tick failed: %s", exc)
|
||||||
|
tick_n += 1
|
||||||
|
if tick_n % _PRUNE_EVERY_TICKS == 0:
|
||||||
|
try:
|
||||||
|
deleted = await repo.prune_orchestrator_events(
|
||||||
|
per_dst_cap=_PRUNE_PER_DST_CAP,
|
||||||
|
)
|
||||||
|
if deleted:
|
||||||
|
logger.info(
|
||||||
|
"orchestrator prune deleted=%d cap=%d",
|
||||||
|
deleted, _PRUNE_PER_DST_CAP,
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
logger.error("orchestrator prune failed: %s", exc)
|
||||||
finally:
|
finally:
|
||||||
for t in (heartbeat_task, control_task):
|
for t in (heartbeat_task, control_task):
|
||||||
t.cancel()
|
t.cancel()
|
||||||
|
|||||||
Reference in New Issue
Block a user