deploy --mode swarm was failing on every heterogeneous fleet: the master populates config.interface from its own box (detect_interface() → its default NIC), then ships that verbatim. The worker's deployer then calls get_host_ip(config.interface), hits 'ip addr show wlp6s0' on a VM whose NIC is enp0s3, and 500s. Fix: agent.executor._relocalize() runs on every swarm-mode deploy. Re-detects the worker's interface/subnet/gateway/host_ip locally and swaps them into the config before calling deployer.deploy(). When the worker's subnet doesn't match the master's, decky IPs are re-allocated from the worker's subnet via allocate_ips() so they're reachable. Unihost-mode configs are left untouched — they're already built against the local box and second-guessing them would be wrong. Validated against anti@192.168.1.13: master dispatched interface=wlp6s0, agent logged 'relocalized interface=enp0s3', deployer ran successfully, dry-run returned ok=deployed. 4 new tests cover both branches (matching-subnet preserves decky IPs; mismatch re-allocates), the end-to-end executor.deploy() path, and the unihost short-circuit.
101 lines
3.6 KiB
Python
101 lines
3.6 KiB
Python
"""Thin adapter between the agent's HTTP endpoints and the existing
|
|
``decnet.engine.deployer`` code path.
|
|
|
|
Kept deliberately small: the agent does not re-implement deployment logic,
|
|
it only translates a master RPC into the same function calls the unihost
|
|
CLI already uses. Everything runs in a worker thread (the deployer is
|
|
blocking) so the FastAPI event loop stays responsive.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from ipaddress import IPv4Network
|
|
from typing import Any
|
|
|
|
from decnet.engine import deployer as _deployer
|
|
from decnet.config import DecnetConfig, load_state, clear_state
|
|
from decnet.logging import get_logger
|
|
from decnet.network import (
|
|
allocate_ips,
|
|
detect_interface,
|
|
detect_subnet,
|
|
get_host_ip,
|
|
)
|
|
|
|
log = get_logger("agent.executor")
|
|
|
|
|
|
def _relocalize(config: DecnetConfig) -> DecnetConfig:
|
|
"""Rewrite a master-built config to the worker's local network reality.
|
|
|
|
The master populates ``interface``/``subnet``/``gateway`` from its own
|
|
box before dispatching, which blows up the deployer on any worker whose
|
|
NIC name differs (common in heterogeneous fleets — master on ``wlp6s0``,
|
|
worker on ``enp0s3``). We always re-detect locally; if the worker sits
|
|
on a different subnet than the master, decky IPs are re-allocated from
|
|
the worker's subnet so they're actually reachable.
|
|
"""
|
|
local_iface = detect_interface()
|
|
local_subnet, local_gateway = detect_subnet(local_iface)
|
|
local_host_ip = get_host_ip(local_iface)
|
|
|
|
updates: dict[str, Any] = {
|
|
"interface": local_iface,
|
|
"subnet": local_subnet,
|
|
"gateway": local_gateway,
|
|
}
|
|
|
|
master_net = IPv4Network(config.subnet, strict=False) if config.subnet else None
|
|
local_net = IPv4Network(local_subnet, strict=False)
|
|
if master_net is None or master_net != local_net:
|
|
log.info(
|
|
"agent.deploy subnet mismatch master=%s local=%s — re-allocating decky IPs",
|
|
config.subnet, local_subnet,
|
|
)
|
|
fresh_ips = allocate_ips(
|
|
subnet=local_subnet,
|
|
gateway=local_gateway,
|
|
host_ip=local_host_ip,
|
|
count=len(config.deckies),
|
|
)
|
|
new_deckies = [d.model_copy(update={"ip": ip}) for d, ip in zip(config.deckies, fresh_ips)]
|
|
updates["deckies"] = new_deckies
|
|
|
|
return config.model_copy(update=updates)
|
|
|
|
|
|
async def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False) -> None:
|
|
"""Run the blocking deployer off-loop. The deployer itself calls
|
|
save_state() internally once the compose file is materialised."""
|
|
log.info(
|
|
"agent.deploy mode=%s deckies=%d interface=%s (incoming)",
|
|
config.mode, len(config.deckies), config.interface,
|
|
)
|
|
if config.mode == "swarm":
|
|
config = _relocalize(config)
|
|
log.info(
|
|
"agent.deploy relocalized interface=%s subnet=%s gateway=%s",
|
|
config.interface, config.subnet, config.gateway,
|
|
)
|
|
await asyncio.to_thread(_deployer.deploy, config, dry_run, no_cache, False)
|
|
|
|
|
|
async def teardown(decky_id: str | None = None) -> None:
|
|
log.info("agent.teardown decky_id=%s", decky_id)
|
|
await asyncio.to_thread(_deployer.teardown, decky_id)
|
|
if decky_id is None:
|
|
await asyncio.to_thread(clear_state)
|
|
|
|
|
|
async def status() -> dict[str, Any]:
|
|
state = await asyncio.to_thread(load_state)
|
|
if state is None:
|
|
return {"deployed": False, "deckies": []}
|
|
config, _compose_path = state
|
|
return {
|
|
"deployed": True,
|
|
"mode": config.mode,
|
|
"compose_path": str(_compose_path),
|
|
"deckies": [d.model_dump() for d in config.deckies],
|
|
}
|