fix(orchestrator): see fleet + shard deckies, not just topology rows

Switches _one_tick from list_running_topology_deckies to
list_running_deckies (the union view added in 095500a). Resolves the
permanent "no actionable deckies (running+ssh count=0)" log on hosts
running only unihost MACVLAN / IPVLAN decoys — the orchestrator now
sees fleet_deckies rows alongside MazeNET topology rows and SWARM
DeckyShard rows.

Also fixes the misleading log message: the old "running+ssh count=N"
reported the *pre-filter* total (count of all running deckies, not
the SSH-eligible subset that scheduler.pick actually evaluates). New
line breaks down running, ssh_eligible, and per-source counts so
debugging "why isn't it picking?" no longer requires reading
scheduler internals.

Regression test: orchestrator integration suite now seeds fleet_deckies
rows (not just topology_deckies) and verifies a tick picks them and
records an event with dst="local:fleet-*" — proving the original bug
on the operator's mothership is fixed.
This commit is contained in:
2026-04-26 21:16:22 -04:00
parent f775223a83
commit a8441481b5
2 changed files with 59 additions and 3 deletions

View File

@@ -106,6 +106,44 @@ async def test_one_tick_records_event_and_publishes(repo, fake_bus, monkeypatch)
assert ev.payload["kind"] == row["kind"]
@pytest.mark.asyncio
async def test_one_tick_picks_fleet_deckies(repo, fake_bus, monkeypatch):
"""Regression: orchestrator was permanently blind to unihost MACVLAN /
IPVLAN deckies because list_running_topology_deckies only scans
topology_deckies. The new union view (list_running_deckies) must
pull in fleet_deckies rows too."""
await repo.upsert_fleet_decky({
"host_uuid": "local",
"name": "fleet-d1",
"services": ["ssh"],
"decky_ip": "10.0.0.50",
"state": "running",
})
await repo.upsert_fleet_decky({
"host_uuid": "local",
"name": "fleet-d2",
"services": ["ssh"],
"decky_ip": "10.0.0.51",
"state": "running",
})
async def fake_run(argv):
if argv[3] == "python3":
return 0, "SSH-2.0-OpenSSH_9.6\r\n", ""
return 0, "", ""
monkeypatch.setattr(ssh_driver, "_run", fake_run)
driver = ssh_driver.SSHDriver()
await orch_worker._one_tick(repo, driver, fake_bus)
rows = await repo.list_orchestrator_events(limit=10)
assert len(rows) == 1
# The dst_decky_uuid is our composite "host_uuid:name" identifier
# for fleet-source rows (see SQLModelRepository.list_running_deckies).
assert rows[0]["dst_decky_uuid"].startswith("local:fleet-")
@pytest.mark.asyncio
async def test_tick_is_noop_when_no_running_deckies(repo, fake_bus, monkeypatch):
called = False