Files
DECNET/decnet/web/router/swarm/api_decommission_host.py
anti 8d18c59201 fix(swarm): require admin JWT on all swarm operator endpoints
Gate all 8 swarm-controller operator routes (enroll, list/get/decommission
hosts, deploy, teardown, check, list deckies) with the centralized
require_admin RBAC dependency alongside require_operator_cert; mTLS becomes
defense-in-depth instead of the only gate. /heartbeat stays cert-fingerprint
pinned (worker-facing) and /swarm/health stays open (liveness only).

CLI swarm commands now send Authorization: Bearer $DECNET_API_TOKEN with a
401/403 hint covering the must_change_password bootstrap flow.

Bump pyjwt to 2.13.0 and pip to 26.1.2 (pip-audit PYSEC-2026-175/177/178/179,
PYSEC-2026-196); authz suite re-verified on the new pyjwt.

Closes ASVS_L2_AUDIT.md V4.1.1a and V4.1.1b (CRITICAL).
2026-06-09 17:08:10 -04:00

72 lines
2.3 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
"""DELETE /swarm/hosts/{uuid} — decommission a worker.
Removes the DeckyShard rows bound to the host (portable cascade — MySQL
and SQLite both honor it via the repo layer), deletes the SwarmHost row,
and best-effort-cleans the per-worker bundle directory on the master.
Also asks the worker agent to wipe its own install (keeping logs). A
dead/unreachable worker does not block master-side cleanup.
"""
from __future__ import annotations
import pathlib
from fastapi import APIRouter, Depends, HTTPException, status
from decnet.logging import get_logger
from decnet.swarm.client import AgentClient
from decnet.web.db.repository import BaseRepository
from decnet.web.dependencies import get_repo, require_admin
from decnet.web.router.swarm._mtls import PeerCert, require_operator_cert
log = get_logger("swarm.decommission")
router = APIRouter()
@router.delete(
"/hosts/{uuid}",
status_code=status.HTTP_204_NO_CONTENT,
tags=["Swarm Hosts"],
responses={
401: {"description": "Missing or invalid admin JWT"},
403: {"description": "Authenticated user is not an admin, or operator cert missing"},
404: {"description": "No host with this UUID is enrolled"},
},
)
async def api_decommission_host(
uuid: str,
repo: BaseRepository = Depends(get_repo),
_admin: dict = Depends(require_admin),
_operator: PeerCert = Depends(require_operator_cert),
) -> None:
row = await repo.get_swarm_host_by_uuid(uuid)
if row is None:
raise HTTPException(status_code=404, detail="host not found")
try:
async with AgentClient(host=row) as agent:
await agent.self_destruct()
except Exception:
log.exception(
"decommission: self-destruct dispatch failed host=%s"
"proceeding with master-side cleanup anyway",
row.get("name"),
)
await repo.delete_decky_shards_for_host(uuid)
await repo.delete_swarm_host(uuid)
# Best-effort bundle cleanup; if the dir was moved manually, don't fail.
bundle_dir = pathlib.Path(row.get("cert_bundle_path") or "")
if bundle_dir.is_dir():
for child in bundle_dir.iterdir():
try:
child.unlink()
except OSError:
pass
try:
bundle_dir.rmdir()
except OSError:
pass