feat(deckies): live service add/remove without full redeploy

decnet.engine.services_live exposes add_service / remove_service for
both fleet and topology decky scopes.  The host's _compose() wrapper
already supported per-service targeting (up --no-deps -d <svc>,
stop, rm -f); what was missing was the orchestration around it:

* add: validate against decnet.services.registry (rejects unknown +
  fleet_singleton); persist the new services list; re-render the
  per-scope compose file (so future redeploys reflect the change);
  run docker compose up -d --no-deps --build <decky>-<svc>.
* remove: stop + rm -f the service container; persist; re-render
  compose so a future up -d doesn't bring it back.

Both publish decky.<name>.service.added / .removed on the bus, with
the post-mutation services list.  Topic constants added to
decnet.bus.topics; the matching wiki entry in wiki-checkout/Service-Bus.md
ships in a separate commit on the wiki repo (wiki-checkout/ is gitignored).

Four new admin endpoints:

* POST/DELETE /api/v1/deckies/{name}/services{,/svc}
* POST/DELETE /api/v1/topologies/{id}/deckies/{name}/services{,/svc}

ServiceMutationError messages are mapped at the API boundary to 404
(decky/topology missing), 409 (idempotency violation), 422 (unknown
or fleet_singleton service).
This commit is contained in:
2026-04-28 22:51:42 -04:00
parent 0bc4b05c73
commit 6ac8cac908
9 changed files with 965 additions and 0 deletions

View File

@@ -66,6 +66,8 @@ from .deploy import (
from .decky import (
DeckyFileDeleteRequest,
DeckyFileDropRequest,
DeckyServiceAddRequest,
DeckyServicesResponse,
)
from .fleet import (
LOCAL_HOST_SENTINEL,
@@ -228,6 +230,8 @@ __all__ = [
"LOCAL_HOST_SENTINEL",
"DeckyFileDeleteRequest",
"DeckyFileDropRequest",
"DeckyServiceAddRequest",
"DeckyServicesResponse",
"FleetDecky",
# health
"ComponentHealth",

View File

@@ -44,6 +44,27 @@ class DeckyFileDropRequest(BaseModel):
return v
class DeckyServiceAddRequest(BaseModel):
"""Add a single service to an already-deployed decky.
The service must be registered (see :mod:`decnet.services.registry`)
and must NOT be ``fleet_singleton`` — those run once fleet-wide,
not per-decky. Validation happens server-side in the engine layer
and surfaces as 422.
"""
name: str = PydanticField(..., min_length=1)
class DeckyServicesResponse(BaseModel):
"""Post-mutation services list, returned by the live add/remove API.
Lets the dashboard reflect the new shape without a follow-up GET.
"""
decky_name: str
topology_id: Optional[str] = None
services: list[str]
class DeckyFileDeleteRequest(BaseModel):
"""Best-effort ``rm -f`` of an absolute path inside a decky container."""
decky_name: str = PydanticField(..., min_length=1)

View File

@@ -14,8 +14,18 @@ from __future__ import annotations
from fastapi import APIRouter
from .api_file_drop import router as file_drop_router
from .api_services import (
fleet_services_router,
topology_services_router,
)
deckies_router = APIRouter()
deckies_router.include_router(file_drop_router)
deckies_router.include_router(fleet_services_router)
# Topology service routes live under /topologies/{id}/... — the prefix
# is set on the router itself. Mounted under the same `deckies_router`
# umbrella because the *operation* (add/remove a service on a deployed
# decky) is identical; only the addressing scheme differs.
deckies_router.include_router(topology_services_router)
__all__ = ["deckies_router"]

View File

@@ -0,0 +1,165 @@
"""POST/DELETE …/{decky}/services — live service add/remove.
Two scopes mounted here:
* fleet: ``/api/v1/deckies/{decky_name}/services``
* topology: ``/api/v1/topologies/{topology_id}/deckies/{decky_name}/services``
Both return the post-mutation services list so the dashboard can
re-render without a follow-up GET.
Auth: ``require_admin`` everywhere (matches every other write op on
deckies — see :mod:`decnet.web.router.fleet.api_mutate_decky`).
"""
from __future__ import annotations
from fastapi import APIRouter, Depends, HTTPException, Path
from decnet.engine.services_live import (
ServiceMutationError,
add_service,
remove_service,
)
from decnet.logging import get_logger
from decnet.web.db.models import (
DeckyServiceAddRequest,
DeckyServicesResponse,
)
from decnet.web.dependencies import repo, require_admin
log = get_logger("api.deckies.services")
fleet_services_router = APIRouter(tags=["Deckies"])
topology_services_router = APIRouter(prefix="/topologies", tags=["Deckies"])
def _map_mutation_error(exc: ServiceMutationError) -> HTTPException:
"""Translate engine-layer errors into 4xx codes.
Three cases the API reasonably distinguishes:
* ``not found`` (decky / topology missing) → 404
* ``already on`` / ``not on`` (idempotency violation) → 409
* everything else (unknown service, fleet_singleton) → 422
"""
msg = str(exc)
if "not found" in msg:
return HTTPException(status_code=404, detail=msg)
if "already on" in msg or "not on" in msg:
return HTTPException(status_code=409, detail=msg)
return HTTPException(status_code=422, detail=msg)
# ---------------------------------------------------------- fleet
@fleet_services_router.post(
"/deckies/{decky_name}/services",
response_model=DeckyServicesResponse,
responses={
400: {"description": "Malformed request body"},
401: {"description": "Could not validate credentials"},
403: {"description": "Insufficient permissions"},
404: {"description": "Decky not found"},
409: {"description": "Service already on decky"},
422: {"description": "Unknown or fleet_singleton service"},
},
)
async def api_fleet_add_service(
req: DeckyServiceAddRequest,
decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"),
admin: dict = Depends(require_admin),
) -> DeckyServicesResponse:
try:
services = await add_service(
repo, decky_kind="fleet",
decky_name=decky_name, service_name=req.name,
)
except ServiceMutationError as exc:
raise _map_mutation_error(exc) from exc
return DeckyServicesResponse(decky_name=decky_name, services=services)
@fleet_services_router.delete(
"/deckies/{decky_name}/services/{service_name}",
response_model=DeckyServicesResponse,
responses={
401: {"description": "Could not validate credentials"},
403: {"description": "Insufficient permissions"},
404: {"description": "Decky not found"},
409: {"description": "Service not on decky"},
},
)
async def api_fleet_remove_service(
decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"),
service_name: str = Path(..., pattern=r"^[a-z0-9_\-]{1,64}$"),
admin: dict = Depends(require_admin),
) -> DeckyServicesResponse:
try:
services = await remove_service(
repo, decky_kind="fleet",
decky_name=decky_name, service_name=service_name,
)
except ServiceMutationError as exc:
raise _map_mutation_error(exc) from exc
return DeckyServicesResponse(decky_name=decky_name, services=services)
# ---------------------------------------------------------- topology
@topology_services_router.post(
"/{topology_id}/deckies/{decky_name}/services",
response_model=DeckyServicesResponse,
responses={
400: {"description": "Malformed request body"},
401: {"description": "Could not validate credentials"},
403: {"description": "Insufficient permissions"},
404: {"description": "Topology or decky not found"},
409: {"description": "Service already on decky"},
422: {"description": "Unknown or fleet_singleton service"},
},
)
async def api_topology_add_service(
req: DeckyServiceAddRequest,
topology_id: str = Path(...),
decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"),
admin: dict = Depends(require_admin),
) -> DeckyServicesResponse:
try:
services = await add_service(
repo, decky_kind="topology", topology_id=topology_id,
decky_name=decky_name, service_name=req.name,
)
except ServiceMutationError as exc:
raise _map_mutation_error(exc) from exc
return DeckyServicesResponse(
decky_name=decky_name, topology_id=topology_id, services=services,
)
@topology_services_router.delete(
"/{topology_id}/deckies/{decky_name}/services/{service_name}",
response_model=DeckyServicesResponse,
responses={
401: {"description": "Could not validate credentials"},
403: {"description": "Insufficient permissions"},
404: {"description": "Topology or decky not found"},
409: {"description": "Service not on decky"},
},
)
async def api_topology_remove_service(
topology_id: str = Path(...),
decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"),
service_name: str = Path(..., pattern=r"^[a-z0-9_\-]{1,64}$"),
admin: dict = Depends(require_admin),
) -> DeckyServicesResponse:
try:
services = await remove_service(
repo, decky_kind="topology", topology_id=topology_id,
decky_name=decky_name, service_name=service_name,
)
except ServiceMutationError as exc:
raise _map_mutation_error(exc) from exc
return DeckyServicesResponse(
decky_name=decky_name, topology_id=topology_id, services=services,
)