sec(env): refuse to start master API with footgun public-binding config
Add validate_public_binding() called from the master API lifespan: when DECNET_API_HOST is non-loopback, refuse to start if DECNET_CORS_ORIGINS still contains a loopback origin (catches the "operator flipped to 0.0.0.0 to make it work and forgot to update CORS" footgun) or if DECNET_CANARY_HTTP_BASE is plaintext http:// to a non-loopback host. Log CRITICAL when DECNET_LIMITER_ENABLED=false on a public binding. The validator no-ops under pytest so unrelated suites don't trip on it. Add DECNET_VERIFY_HOSTNAME env knob; AgentClient and UpdaterClient consult it when verify_hostname is None, giving production deploys TLS hostname verification on top of the existing CA + fingerprint pin. Default off so dev enrollments with mismatched SANs keep working.
This commit is contained in:
@@ -174,6 +174,101 @@ _cors_raw: str = os.environ.get("DECNET_CORS_ORIGINS", _cors_default)
|
||||
DECNET_CORS_ORIGINS: list[str] = [o.strip() for o in _cors_raw.split(",") if o.strip()]
|
||||
|
||||
|
||||
# Master→worker mTLS hostname verification. Off by default because legacy
|
||||
# enrollments were issued certs with operator-supplied SAN lists that may
|
||||
# not match the URL the master uses to connect; set to "true" on a fresh
|
||||
# production deploy where you control enrollment to get TLS hostname checks
|
||||
# on top of CA + fingerprint pinning.
|
||||
DECNET_VERIFY_HOSTNAME: bool = (
|
||||
os.environ.get("DECNET_VERIFY_HOSTNAME", "false").lower() == "true"
|
||||
)
|
||||
|
||||
|
||||
_LOOPBACK_HOSTS = {"localhost", "127.0.0.1", "::1"}
|
||||
_WILDCARD_BIND_HOSTS = {"0.0.0.0", "::"} # nosec B104 — comparison only
|
||||
|
||||
|
||||
def _origin_host(origin: str) -> str:
|
||||
"""Pull the bare hostname out of a CORS origin (``http(s)://host:port``).
|
||||
|
||||
Returns the full origin lowercased if the URL can't be parsed — the
|
||||
caller treats unrecognised origins as non-loopback, which is the safer
|
||||
default for a public-binding check.
|
||||
"""
|
||||
from urllib.parse import urlparse
|
||||
|
||||
try:
|
||||
parsed = urlparse(origin)
|
||||
host = (parsed.hostname or "").lower()
|
||||
return host or origin.strip().lower()
|
||||
except (ValueError, AttributeError):
|
||||
return origin.strip().lower()
|
||||
|
||||
|
||||
def validate_public_binding() -> None:
|
||||
"""Refuse to start the master API/web with a footgun config.
|
||||
|
||||
Three checks, all gated on the API binding being non-loopback (i.e.
|
||||
actually exposed to the network):
|
||||
|
||||
* If CORS allow-list still contains a loopback origin, fail. The most
|
||||
common shape of this bug is operator flips ``DECNET_API_HOST=0.0.0.0``
|
||||
to "make it work" and forgets to update ``DECNET_CORS_ORIGINS`` —
|
||||
the dashboard then either can't talk to the API at all, or worse,
|
||||
they wildcard CORS to paper over it.
|
||||
|
||||
* If the canary HTTP base is plaintext (``http://``) and the canary
|
||||
host isn't loopback, fail. Canary tokens phone home on trigger;
|
||||
plaintext over the public internet leaks the token to anyone on
|
||||
the path.
|
||||
|
||||
* If the rate limiter is globally disabled, log loudly. Don't fail —
|
||||
operators sometimes want this for benchmarking — but never let it
|
||||
slip past unmentioned on a public binding.
|
||||
|
||||
Called from the FastAPI lifespan so it surfaces at startup, not on
|
||||
first request. Skipped automatically when running under pytest so
|
||||
the test suite doesn't have to set five env vars per fixture.
|
||||
"""
|
||||
if any(k.startswith("PYTEST") for k in os.environ):
|
||||
return
|
||||
if DECNET_API_HOST in _LOOPBACK_HOSTS:
|
||||
return # not exposed; nothing to validate
|
||||
|
||||
bind_label = "DECNET_API_HOST" if DECNET_API_HOST in _WILDCARD_BIND_HOSTS else "DECNET_API_HOST"
|
||||
loopback_origins = [o for o in DECNET_CORS_ORIGINS if _origin_host(o) in _LOOPBACK_HOSTS]
|
||||
if loopback_origins:
|
||||
raise ValueError(
|
||||
f"{bind_label}={DECNET_API_HOST!r} exposes the API to the network, "
|
||||
f"but DECNET_CORS_ORIGINS still contains loopback origin(s) "
|
||||
f"{loopback_origins!r}. Set DECNET_CORS_ORIGINS to the public "
|
||||
f"dashboard URL(s) before starting (e.g. "
|
||||
f"DECNET_CORS_ORIGINS=https://dashboard.example.com)."
|
||||
)
|
||||
|
||||
canary_base = os.environ.get("DECNET_CANARY_HTTP_BASE", "").strip()
|
||||
if canary_base and canary_base.lower().startswith("http://"):
|
||||
host = _origin_host(canary_base)
|
||||
if host and host not in _LOOPBACK_HOSTS:
|
||||
raise ValueError(
|
||||
f"DECNET_CANARY_HTTP_BASE={canary_base!r} is plaintext HTTP and "
|
||||
f"points at a non-loopback host. Canary triggers carry secrets "
|
||||
f"that must not cross the public internet in cleartext — use "
|
||||
f"https:// or front the canary endpoint with a TLS proxy."
|
||||
)
|
||||
|
||||
limiter_enabled = os.environ.get("DECNET_LIMITER_ENABLED", "true").lower() != "false"
|
||||
if not limiter_enabled:
|
||||
# Late import to avoid a circular dependency through decnet.logging.
|
||||
from decnet.logging import get_logger
|
||||
get_logger("env").critical(
|
||||
"DECNET_LIMITER_ENABLED=false on a public binding (%s=%s). "
|
||||
"Login + write endpoints have no rate limiting — only run this "
|
||||
"way for benchmarking or behind an external rate-limiting proxy.",
|
||||
bind_label, DECNET_API_HOST,
|
||||
)
|
||||
|
||||
|
||||
def __getattr__(name: str) -> str:
|
||||
"""Lazy resolution for secrets only the master web/api process needs."""
|
||||
if name == "DECNET_JWT_SECRET":
|
||||
|
||||
@@ -106,15 +106,20 @@ class AgentClient:
|
||||
address: Optional[str] = None,
|
||||
agent_port: Optional[int] = None,
|
||||
identity: Optional[MasterIdentity] = None,
|
||||
verify_hostname: bool = False,
|
||||
verify_hostname: Optional[bool] = None,
|
||||
):
|
||||
"""Either pass a SwarmHost dict, or explicit address/port.
|
||||
|
||||
``verify_hostname`` stays False by default because the worker's
|
||||
cert SAN is populated from the operator-supplied address list, not
|
||||
from modern TLS hostname-verification semantics. The mTLS client
|
||||
cert + CA pinning are what authenticate the peer.
|
||||
``verify_hostname`` defers to ``DECNET_VERIFY_HOSTNAME`` when the
|
||||
caller doesn't pass an explicit value — production deploys flip
|
||||
the env var on so the worker's cert SAN must match the address
|
||||
the master connects to, on top of the existing CA + fingerprint
|
||||
pin. Defaults to False so dev/test enrollments with mismatched
|
||||
SANs keep working unchanged.
|
||||
"""
|
||||
if verify_hostname is None:
|
||||
from decnet.env import DECNET_VERIFY_HOSTNAME
|
||||
verify_hostname = DECNET_VERIFY_HOSTNAME
|
||||
if host is not None:
|
||||
self._address = host["address"]
|
||||
self._port = int(host.get("agent_port") or 8765)
|
||||
|
||||
@@ -24,6 +24,7 @@ from decnet.env import (
|
||||
DECNET_INGEST_LOG_FILE,
|
||||
DECNET_PROFILE_DIR,
|
||||
DECNET_PROFILE_REQUESTS,
|
||||
validate_public_binding,
|
||||
)
|
||||
from decnet.logging import get_logger
|
||||
from decnet.web.dependencies import repo
|
||||
@@ -69,6 +70,11 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
soft,
|
||||
)
|
||||
|
||||
# Refuse to come up with a footgun config on a public binding (loopback
|
||||
# CORS origin while bound to 0.0.0.0, plaintext canary base, etc.).
|
||||
# Raises ValueError with an actionable message; uvicorn surfaces it.
|
||||
validate_public_binding()
|
||||
|
||||
log.info("API startup initialising database")
|
||||
for attempt in range(1, 6):
|
||||
try:
|
||||
|
||||
89
tests/web/test_validate_public_binding.py
Normal file
89
tests/web/test_validate_public_binding.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""validate_public_binding refuses footgun configs at master startup.
|
||||
|
||||
The validator no-ops under pytest by design (so unit tests in unrelated
|
||||
modules don't have to set five env vars per fixture); these tests strip
|
||||
the PYTEST_* vars before calling it so the real code path runs.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _reimport_env(monkeypatch: pytest.MonkeyPatch):
|
||||
for mod in list(sys.modules):
|
||||
if mod == "decnet.env" or mod.startswith("decnet.env."):
|
||||
sys.modules.pop(mod)
|
||||
return importlib.import_module("decnet.env")
|
||||
|
||||
|
||||
def _strip_pytest_vars(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
import os
|
||||
for k in list(os.environ):
|
||||
if k.startswith("PYTEST"):
|
||||
monkeypatch.delenv(k, raising=False)
|
||||
|
||||
|
||||
def test_validator_noop_on_loopback_binding(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setenv("DECNET_API_HOST", "127.0.0.1")
|
||||
monkeypatch.setenv("DECNET_CORS_ORIGINS", "http://localhost:8080")
|
||||
env = _reimport_env(monkeypatch)
|
||||
_strip_pytest_vars(monkeypatch)
|
||||
env.validate_public_binding() # no raise
|
||||
|
||||
|
||||
def test_validator_rejects_loopback_cors_on_public_bind(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setenv("DECNET_API_HOST", "0.0.0.0")
|
||||
monkeypatch.setenv("DECNET_CORS_ORIGINS", "http://localhost:8080")
|
||||
env = _reimport_env(monkeypatch)
|
||||
_strip_pytest_vars(monkeypatch)
|
||||
with pytest.raises(ValueError, match="loopback origin"):
|
||||
env.validate_public_binding()
|
||||
|
||||
|
||||
def test_validator_accepts_public_cors_on_public_bind(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setenv("DECNET_API_HOST", "0.0.0.0")
|
||||
monkeypatch.setenv("DECNET_CORS_ORIGINS", "https://dashboard.example.com")
|
||||
env = _reimport_env(monkeypatch)
|
||||
_strip_pytest_vars(monkeypatch)
|
||||
env.validate_public_binding() # no raise
|
||||
|
||||
|
||||
def test_validator_rejects_plaintext_canary_on_public_bind(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setenv("DECNET_API_HOST", "0.0.0.0")
|
||||
monkeypatch.setenv("DECNET_CORS_ORIGINS", "https://dashboard.example.com")
|
||||
monkeypatch.setenv("DECNET_CANARY_HTTP_BASE", "http://canary.example.com:8088")
|
||||
env = _reimport_env(monkeypatch)
|
||||
_strip_pytest_vars(monkeypatch)
|
||||
with pytest.raises(ValueError, match="plaintext HTTP"):
|
||||
env.validate_public_binding()
|
||||
|
||||
|
||||
def test_validator_allows_loopback_canary_even_on_public_bind(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
# Local canary endpoint behind the master is fine; only public-facing
|
||||
# plaintext is the footgun.
|
||||
monkeypatch.setenv("DECNET_API_HOST", "0.0.0.0")
|
||||
monkeypatch.setenv("DECNET_CORS_ORIGINS", "https://dashboard.example.com")
|
||||
monkeypatch.setenv("DECNET_CANARY_HTTP_BASE", "http://localhost:8088")
|
||||
env = _reimport_env(monkeypatch)
|
||||
_strip_pytest_vars(monkeypatch)
|
||||
env.validate_public_binding() # no raise
|
||||
|
||||
|
||||
def test_validator_skips_under_pytest(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
# With PYTEST_* still in env (default), even a misconfigured env passes —
|
||||
# this is the deliberate bypass so unrelated tests don't trip on it.
|
||||
monkeypatch.setenv("DECNET_API_HOST", "0.0.0.0")
|
||||
monkeypatch.setenv("DECNET_CORS_ORIGINS", "http://localhost:8080")
|
||||
env = _reimport_env(monkeypatch)
|
||||
env.validate_public_binding() # no raise — guard short-circuits
|
||||
Reference in New Issue
Block a user