fix: clear all addressable technical debt (DEBT-005 through DEBT-025)

Security:
- DEBT-008: remove query-string token auth; header-only Bearer now enforced
- DEBT-013: add regex constraint ^[a-z0-9\-]{1,64}$ on decky_name path param
- DEBT-015: stop leaking raw exception detail to API clients; log server-side
- DEBT-016: validate search (max_length=512) and datetime params with regex

Reliability:
- DEBT-014: wrap SSE event_generator in try/except; yield error frame on failure
- DEBT-017: emit log.warning/error on DB init retry; silent failures now visible

Observability / Docs:
- DEBT-020: add 401/422 response declarations to all route decorators

Infrastructure:
- DEBT-018: add HEALTHCHECK to all 24 template Dockerfiles
- DEBT-019: add USER decnet + setcap cap_net_bind_service to all 24 Dockerfiles
- DEBT-024: bump Redis template version 7.0.12 → 7.2.7

Config:
- DEBT-012: validate DECNET_API_PORT and DECNET_WEB_PORT range (1-65535)

Code quality:
- DEBT-010: delete 22 duplicate decnet_logging.py copies; deployer injects canonical
- DEBT-022: closed as false positive (print only in module docstring)
- DEBT-009: closed as false positive (templates already use structured syslog_line)

Build:
- DEBT-025: generate requirements.lock via pip freeze

Testing:
- DEBT-005/006/007: comprehensive test suite added across tests/api/
- conftest: in-memory SQLite + StaticPool + monkeypatched session_factory
- fuzz mark added; default run excludes fuzz; -n logical parallelism

DEBT.md updated: 23/25 items closed; DEBT-011 (Alembic) and DEBT-023 (digest pinning) remain
This commit is contained in:
2026-04-09 19:02:51 -04:00
parent 0166d0d559
commit 016115a523
78 changed files with 527 additions and 5579 deletions

View File

@@ -2,6 +2,7 @@
Deploy, teardown, and status via Docker SDK + subprocess docker compose.
"""
import shutil
import subprocess # nosec B404
import time
from pathlib import Path
@@ -27,6 +28,25 @@ from decnet.network import (
console = Console()
COMPOSE_FILE = Path("decnet-compose.yml")
_CANONICAL_LOGGING = Path(__file__).parent.parent / "templates" / "decnet_logging.py"
def _sync_logging_helper(config: DecnetConfig) -> None:
"""Copy the canonical decnet_logging.py into every active template build context."""
from decnet.services.registry import get_service
seen: set[Path] = set()
for decky in config.deckies:
for svc_name in decky.services:
svc = get_service(svc_name)
if svc is None:
continue
ctx = svc.dockerfile_context()
if ctx is None or ctx in seen:
continue
seen.add(ctx)
dest = ctx / "decnet_logging.py"
if not dest.exists() or dest.read_bytes() != _CANONICAL_LOGGING.read_bytes():
shutil.copy2(_CANONICAL_LOGGING, dest)
def _compose(*args: str, compose_file: Path = COMPOSE_FILE) -> None:
@@ -110,6 +130,9 @@ def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False)
)
setup_host_macvlan(config.interface, host_ip, decky_range)
# --- Sync shared logging helper into each template build context ---
_sync_logging_helper(config)
# --- Compose generation ---
compose_path = write_compose(config, COMPOSE_FILE)
console.print(f"[bold cyan]Compose file written[/] → {compose_path}")

View File

@@ -10,6 +10,17 @@ load_dotenv(_ROOT / ".env.local")
load_dotenv(_ROOT / ".env")
def _port(name: str, default: int) -> int:
raw = os.environ.get(name, str(default))
try:
value = int(raw)
except ValueError:
raise ValueError(f"Environment variable '{name}' must be an integer, got '{raw}'.")
if not (1 <= value <= 65535):
raise ValueError(f"Environment variable '{name}' must be 165535, got {value}.")
return value
def _require_env(name: str) -> str:
"""Return the env var value or raise at startup if it is unset or a known-bad default."""
_KNOWN_BAD = {"fallback-secret-key-change-me", "admin", "secret", "password", "changeme"}
@@ -33,13 +44,13 @@ def _require_env(name: str) -> str:
# API Options
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "0.0.0.0") # nosec B104
DECNET_API_PORT: int = int(os.environ.get("DECNET_API_PORT", "8000"))
DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
DECNET_JWT_SECRET: str = _require_env("DECNET_JWT_SECRET")
DECNET_INGEST_LOG_FILE: str | None = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")
# Web Dashboard Options
DECNET_WEB_HOST: str = os.environ.get("DECNET_WEB_HOST", "0.0.0.0") # nosec B104
DECNET_WEB_PORT: int = int(os.environ.get("DECNET_WEB_PORT", "8080"))
DECNET_WEB_PORT: int = _port("DECNET_WEB_PORT", 8080)
DECNET_ADMIN_USER: str = os.environ.get("DECNET_ADMIN_USER", "admin")
DECNET_ADMIN_PASSWORD: str = os.environ.get("DECNET_ADMIN_PASSWORD", "admin")
DECNET_DEVELOPER: bool = os.environ.get("DECNET_DEVELOPER", "False").lower() == "true"

View File

@@ -1,4 +1,5 @@
import asyncio
import logging
from contextlib import asynccontextmanager
from typing import Any, AsyncGenerator, Optional
@@ -10,19 +11,22 @@ from decnet.web.dependencies import repo
from decnet.web.ingester import log_ingestion_worker
from decnet.web.router import api_router
log = logging.getLogger(__name__)
ingestion_task: Optional[asyncio.Task[Any]] = None
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
global ingestion_task
# Retry initialization a few times if DB is locked (common in tests)
for _ in range(5):
for attempt in range(1, 6):
try:
await repo.initialize()
break
except Exception:
except Exception as exc:
log.warning("DB init attempt %d/5 failed: %s", attempt, exc)
if attempt == 5:
log.error("DB failed to initialize after 5 attempts — startup may be degraded")
await asyncio.sleep(0.5)
# Start background ingestion task

View File

@@ -7,11 +7,15 @@ from pathlib import Path
# Sync for initialization (DDL) and async for standard queries
def get_async_engine(db_path: str):
# aiosqlite driver for async access
return create_async_engine(f"sqlite+aiosqlite:///{db_path}", echo=False, connect_args={"uri": True})
# If it's a memory URI, don't add the extra slash that turns it into a relative file
prefix = "sqlite+aiosqlite:///"
if db_path.startswith("file:"):
prefix = "sqlite+aiosqlite:///"
return create_async_engine(f"{prefix}{db_path}", echo=False, connect_args={"uri": True})
def get_sync_engine(db_path: str):
return create_engine(f"sqlite:///{db_path}", echo=False, connect_args={"uri": True})
prefix = "sqlite:///"
return create_engine(f"{prefix}{db_path}", echo=False, connect_args={"uri": True})
def init_db(db_path: str):
"""Synchronously create all tables."""

View File

@@ -25,14 +25,12 @@ async def get_current_user(request: Request) -> str:
headers={"WWW-Authenticate": "Bearer"},
)
# Extract token from header or query param
token: str | None = None
auth_header = request.headers.get("Authorization")
if auth_header and auth_header.startswith("Bearer "):
token = auth_header.split(" ")[1]
elif request.query_params.get("token"):
token = request.query_params.get("token")
token: str | None = (
auth_header.split(" ", 1)[1]
if auth_header and auth_header.startswith("Bearer ")
else None
)
if not token:
raise _credentials_exception

View File

@@ -9,7 +9,11 @@ from decnet.web.db.models import ChangePasswordRequest
router = APIRouter()
@router.post("/auth/change-password", tags=["Authentication"])
@router.post(
"/auth/change-password",
tags=["Authentication"],
responses={401: {"description": "Invalid or expired token / wrong old password"}, 422: {"description": "Validation error"}},
)
async def change_password(request: ChangePasswordRequest, current_user: str = Depends(get_current_user)) -> dict[str, str]:
_user: Optional[dict[str, Any]] = await repo.get_user_by_uuid(current_user)
if not _user or not verify_password(request.old_password, _user["password_hash"]):

View File

@@ -14,7 +14,12 @@ from decnet.web.db.models import LoginRequest, Token
router = APIRouter()
@router.post("/auth/login", response_model=Token, tags=["Authentication"])
@router.post(
"/auth/login",
response_model=Token,
tags=["Authentication"],
responses={401: {"description": "Incorrect username or password"}, 422: {"description": "Validation error"}},
)
async def login(request: LoginRequest) -> dict[str, Any]:
_user: Optional[dict[str, Any]] = await repo.get_user_by_username(request.username)
if not _user or not verify_password(request.password, _user["password_hash"]):

View File

@@ -8,7 +8,8 @@ from decnet.web.db.models import BountyResponse
router = APIRouter()
@router.get("/bounty", response_model=BountyResponse, tags=["Bounty Vault"])
@router.get("/bounty", response_model=BountyResponse, tags=["Bounty Vault"],
responses={401: {"description": "Not authenticated"}, 422: {"description": "Validation error"}},)
async def get_bounties(
limit: int = Query(50, ge=1, le=1000),
offset: int = Query(0, ge=0),

View File

@@ -74,7 +74,7 @@ async def api_deploy_deckies(req: DeployIniRequest, current_user: str = Depends(
try:
_deploy(config)
except Exception as e:
logging.getLogger("decnet.web.api").error(f"Deployment failed: {e}")
raise HTTPException(status_code=500, detail=f"Deployment failed: {e}")
logging.getLogger("decnet.web.api").exception("Deployment failed: %s", e)
raise HTTPException(status_code=500, detail="Deployment failed. Check server logs for details.")
return {"message": "Deckies deployed successfully"}

View File

@@ -7,6 +7,7 @@ from decnet.web.dependencies import get_current_user, repo
router = APIRouter()
@router.get("/deckies", tags=["Fleet Management"])
@router.get("/deckies", tags=["Fleet Management"],
responses={401: {"description": "Not authenticated"}, 422: {"description": "Validation error"}},)
async def get_deckies(current_user: str = Depends(get_current_user)) -> list[dict[str, Any]]:
return await repo.get_deckies()

View File

@@ -1,4 +1,4 @@
from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException, Path
from decnet.mutator import mutate_decky
from decnet.web.dependencies import get_current_user
@@ -7,7 +7,10 @@ router = APIRouter()
@router.post("/deckies/{decky_name}/mutate", tags=["Fleet Management"])
async def api_mutate_decky(decky_name: str, current_user: str = Depends(get_current_user)) -> dict[str, str]:
async def api_mutate_decky(
decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"),
current_user: str = Depends(get_current_user),
) -> dict[str, str]:
success = mutate_decky(decky_name)
if success:
return {"message": f"Successfully mutated {decky_name}"}

View File

@@ -7,7 +7,8 @@ from decnet.web.db.models import MutateIntervalRequest
router = APIRouter()
@router.put("/deckies/{decky_name}/mutate-interval", tags=["Fleet Management"])
@router.put("/deckies/{decky_name}/mutate-interval", tags=["Fleet Management"],
responses={401: {"description": "Not authenticated"}, 422: {"description": "Validation error"}},)
async def api_update_mutate_interval(decky_name: str, req: MutateIntervalRequest, current_user: str = Depends(get_current_user)) -> dict[str, str]:
state = load_state()
if not state:

View File

@@ -7,7 +7,8 @@ from decnet.web.dependencies import get_current_user, repo
router = APIRouter()
@router.get("/logs/histogram", tags=["Logs"])
@router.get("/logs/histogram", tags=["Logs"],
responses={401: {"description": "Not authenticated"}, 422: {"description": "Validation error"}},)
async def get_logs_histogram(
search: Optional[str] = None,
start_time: Optional[str] = None,

View File

@@ -7,14 +7,16 @@ from decnet.web.db.models import LogsResponse
router = APIRouter()
_DATETIME_RE = r"^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}$"
@router.get("/logs", response_model=LogsResponse, tags=["Logs"])
async def get_logs(
limit: int = Query(50, ge=1, le=1000),
offset: int = Query(0, ge=0),
search: Optional[str] = None,
start_time: Optional[str] = None,
end_time: Optional[str] = None,
search: Optional[str] = Query(None, max_length=512),
start_time: Optional[str] = Query(None, pattern=_DATETIME_RE),
end_time: Optional[str] = Query(None, pattern=_DATETIME_RE),
current_user: str = Depends(get_current_user)
) -> dict[str, Any]:
_logs: list[dict[str, Any]] = await repo.get_logs(limit=limit, offset=offset, search=search, start_time=start_time, end_time=end_time)

View File

@@ -8,6 +8,7 @@ from decnet.web.db.models import StatsResponse
router = APIRouter()
@router.get("/stats", response_model=StatsResponse, tags=["Observability"])
@router.get("/stats", response_model=StatsResponse, tags=["Observability"],
responses={401: {"description": "Not authenticated"}, 422: {"description": "Validation error"}},)
async def get_stats(current_user: str = Depends(get_current_user)) -> dict[str, Any]:
return await repo.get_stats_summary()

View File

@@ -1,5 +1,6 @@
import json
import asyncio
import logging
from typing import AsyncGenerator, Optional
from fastapi import APIRouter, Depends, Query, Request
@@ -7,10 +8,13 @@ from fastapi.responses import StreamingResponse
from decnet.web.dependencies import get_current_user, repo
log = logging.getLogger(__name__)
router = APIRouter()
@router.get("/stream", tags=["Observability"])
@router.get("/stream", tags=["Observability"],
responses={401: {"description": "Not authenticated"}, 422: {"description": "Validation error"}},)
async def stream_events(
request: Request,
last_event_id: int = Query(0, alias="lastEventId"),
@@ -21,43 +25,42 @@ async def stream_events(
) -> StreamingResponse:
async def event_generator() -> AsyncGenerator[str, None]:
# Start tracking from the provided ID, or current max if 0
last_id = last_event_id
if last_id == 0:
last_id = await repo.get_max_log_id()
stats_interval_sec = 10
loops_since_stats = 0
while True:
if await request.is_disconnected():
break
try:
if last_id == 0:
last_id = await repo.get_max_log_id()
# Poll for new logs
new_logs = await repo.get_logs_after_id(last_id, limit=50, search=search, start_time=start_time, end_time=end_time)
if new_logs:
# Update last_id to the max id in the fetched batch
last_id = max(log["id"] for log in new_logs)
payload = json.dumps({"type": "logs", "data": new_logs})
yield f"event: message\ndata: {payload}\n\n"
# If we have new logs, stats probably changed, so force a stats update
loops_since_stats = stats_interval_sec
# Periodically poll for stats
if loops_since_stats >= stats_interval_sec:
stats = await repo.get_stats_summary()
payload = json.dumps({"type": "stats", "data": stats})
yield f"event: message\ndata: {payload}\n\n"
while True:
if await request.is_disconnected():
break
# Also yield histogram
histogram = await repo.get_log_histogram(search=search, start_time=start_time, end_time=end_time, interval_minutes=15)
hist_payload = json.dumps({"type": "histogram", "data": histogram})
yield f"event: message\ndata: {hist_payload}\n\n"
new_logs = await repo.get_logs_after_id(
last_id, limit=50, search=search,
start_time=start_time, end_time=end_time,
)
if new_logs:
last_id = max(entry["id"] for entry in new_logs)
yield f"event: message\ndata: {json.dumps({'type': 'logs', 'data': new_logs})}\n\n"
loops_since_stats = stats_interval_sec
loops_since_stats = 0
loops_since_stats += 1
await asyncio.sleep(1)
if loops_since_stats >= stats_interval_sec:
stats = await repo.get_stats_summary()
yield f"event: message\ndata: {json.dumps({'type': 'stats', 'data': stats})}\n\n"
histogram = await repo.get_log_histogram(
search=search, start_time=start_time,
end_time=end_time, interval_minutes=15,
)
yield f"event: message\ndata: {json.dumps({'type': 'histogram', 'data': histogram})}\n\n"
loops_since_stats = 0
loops_since_stats += 1
await asyncio.sleep(1)
except asyncio.CancelledError:
pass
except Exception:
log.exception("SSE stream error for user %s", last_event_id)
yield f"event: error\ndata: {json.dumps({'type': 'error', 'message': 'Stream interrupted'})}\n\n"
return StreamingResponse(event_generator(), media_type="text/event-stream")