sec(env): refuse to start master API with footgun public-binding config
Add validate_public_binding() called from the master API lifespan: when DECNET_API_HOST is non-loopback, refuse to start if DECNET_CORS_ORIGINS still contains a loopback origin (catches the "operator flipped to 0.0.0.0 to make it work and forgot to update CORS" footgun) or if DECNET_CANARY_HTTP_BASE is plaintext http:// to a non-loopback host. Log CRITICAL when DECNET_LIMITER_ENABLED=false on a public binding. The validator no-ops under pytest so unrelated suites don't trip on it. Add DECNET_VERIFY_HOSTNAME env knob; AgentClient and UpdaterClient consult it when verify_hostname is None, giving production deploys TLS hostname verification on top of the existing CA + fingerprint pin. Default off so dev enrollments with mismatched SANs keep working.
This commit is contained in:
@@ -174,6 +174,101 @@ _cors_raw: str = os.environ.get("DECNET_CORS_ORIGINS", _cors_default)
|
|||||||
DECNET_CORS_ORIGINS: list[str] = [o.strip() for o in _cors_raw.split(",") if o.strip()]
|
DECNET_CORS_ORIGINS: list[str] = [o.strip() for o in _cors_raw.split(",") if o.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
# Master→worker mTLS hostname verification. Off by default because legacy
|
||||||
|
# enrollments were issued certs with operator-supplied SAN lists that may
|
||||||
|
# not match the URL the master uses to connect; set to "true" on a fresh
|
||||||
|
# production deploy where you control enrollment to get TLS hostname checks
|
||||||
|
# on top of CA + fingerprint pinning.
|
||||||
|
DECNET_VERIFY_HOSTNAME: bool = (
|
||||||
|
os.environ.get("DECNET_VERIFY_HOSTNAME", "false").lower() == "true"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_LOOPBACK_HOSTS = {"localhost", "127.0.0.1", "::1"}
|
||||||
|
_WILDCARD_BIND_HOSTS = {"0.0.0.0", "::"} # nosec B104 — comparison only
|
||||||
|
|
||||||
|
|
||||||
|
def _origin_host(origin: str) -> str:
|
||||||
|
"""Pull the bare hostname out of a CORS origin (``http(s)://host:port``).
|
||||||
|
|
||||||
|
Returns the full origin lowercased if the URL can't be parsed — the
|
||||||
|
caller treats unrecognised origins as non-loopback, which is the safer
|
||||||
|
default for a public-binding check.
|
||||||
|
"""
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
try:
|
||||||
|
parsed = urlparse(origin)
|
||||||
|
host = (parsed.hostname or "").lower()
|
||||||
|
return host or origin.strip().lower()
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
return origin.strip().lower()
|
||||||
|
|
||||||
|
|
||||||
|
def validate_public_binding() -> None:
|
||||||
|
"""Refuse to start the master API/web with a footgun config.
|
||||||
|
|
||||||
|
Three checks, all gated on the API binding being non-loopback (i.e.
|
||||||
|
actually exposed to the network):
|
||||||
|
|
||||||
|
* If CORS allow-list still contains a loopback origin, fail. The most
|
||||||
|
common shape of this bug is operator flips ``DECNET_API_HOST=0.0.0.0``
|
||||||
|
to "make it work" and forgets to update ``DECNET_CORS_ORIGINS`` —
|
||||||
|
the dashboard then either can't talk to the API at all, or worse,
|
||||||
|
they wildcard CORS to paper over it.
|
||||||
|
|
||||||
|
* If the canary HTTP base is plaintext (``http://``) and the canary
|
||||||
|
host isn't loopback, fail. Canary tokens phone home on trigger;
|
||||||
|
plaintext over the public internet leaks the token to anyone on
|
||||||
|
the path.
|
||||||
|
|
||||||
|
* If the rate limiter is globally disabled, log loudly. Don't fail —
|
||||||
|
operators sometimes want this for benchmarking — but never let it
|
||||||
|
slip past unmentioned on a public binding.
|
||||||
|
|
||||||
|
Called from the FastAPI lifespan so it surfaces at startup, not on
|
||||||
|
first request. Skipped automatically when running under pytest so
|
||||||
|
the test suite doesn't have to set five env vars per fixture.
|
||||||
|
"""
|
||||||
|
if any(k.startswith("PYTEST") for k in os.environ):
|
||||||
|
return
|
||||||
|
if DECNET_API_HOST in _LOOPBACK_HOSTS:
|
||||||
|
return # not exposed; nothing to validate
|
||||||
|
|
||||||
|
bind_label = "DECNET_API_HOST" if DECNET_API_HOST in _WILDCARD_BIND_HOSTS else "DECNET_API_HOST"
|
||||||
|
loopback_origins = [o for o in DECNET_CORS_ORIGINS if _origin_host(o) in _LOOPBACK_HOSTS]
|
||||||
|
if loopback_origins:
|
||||||
|
raise ValueError(
|
||||||
|
f"{bind_label}={DECNET_API_HOST!r} exposes the API to the network, "
|
||||||
|
f"but DECNET_CORS_ORIGINS still contains loopback origin(s) "
|
||||||
|
f"{loopback_origins!r}. Set DECNET_CORS_ORIGINS to the public "
|
||||||
|
f"dashboard URL(s) before starting (e.g. "
|
||||||
|
f"DECNET_CORS_ORIGINS=https://dashboard.example.com)."
|
||||||
|
)
|
||||||
|
|
||||||
|
canary_base = os.environ.get("DECNET_CANARY_HTTP_BASE", "").strip()
|
||||||
|
if canary_base and canary_base.lower().startswith("http://"):
|
||||||
|
host = _origin_host(canary_base)
|
||||||
|
if host and host not in _LOOPBACK_HOSTS:
|
||||||
|
raise ValueError(
|
||||||
|
f"DECNET_CANARY_HTTP_BASE={canary_base!r} is plaintext HTTP and "
|
||||||
|
f"points at a non-loopback host. Canary triggers carry secrets "
|
||||||
|
f"that must not cross the public internet in cleartext — use "
|
||||||
|
f"https:// or front the canary endpoint with a TLS proxy."
|
||||||
|
)
|
||||||
|
|
||||||
|
limiter_enabled = os.environ.get("DECNET_LIMITER_ENABLED", "true").lower() != "false"
|
||||||
|
if not limiter_enabled:
|
||||||
|
# Late import to avoid a circular dependency through decnet.logging.
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
get_logger("env").critical(
|
||||||
|
"DECNET_LIMITER_ENABLED=false on a public binding (%s=%s). "
|
||||||
|
"Login + write endpoints have no rate limiting — only run this "
|
||||||
|
"way for benchmarking or behind an external rate-limiting proxy.",
|
||||||
|
bind_label, DECNET_API_HOST,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def __getattr__(name: str) -> str:
|
def __getattr__(name: str) -> str:
|
||||||
"""Lazy resolution for secrets only the master web/api process needs."""
|
"""Lazy resolution for secrets only the master web/api process needs."""
|
||||||
if name == "DECNET_JWT_SECRET":
|
if name == "DECNET_JWT_SECRET":
|
||||||
|
|||||||
@@ -106,15 +106,20 @@ class AgentClient:
|
|||||||
address: Optional[str] = None,
|
address: Optional[str] = None,
|
||||||
agent_port: Optional[int] = None,
|
agent_port: Optional[int] = None,
|
||||||
identity: Optional[MasterIdentity] = None,
|
identity: Optional[MasterIdentity] = None,
|
||||||
verify_hostname: bool = False,
|
verify_hostname: Optional[bool] = None,
|
||||||
):
|
):
|
||||||
"""Either pass a SwarmHost dict, or explicit address/port.
|
"""Either pass a SwarmHost dict, or explicit address/port.
|
||||||
|
|
||||||
``verify_hostname`` stays False by default because the worker's
|
``verify_hostname`` defers to ``DECNET_VERIFY_HOSTNAME`` when the
|
||||||
cert SAN is populated from the operator-supplied address list, not
|
caller doesn't pass an explicit value — production deploys flip
|
||||||
from modern TLS hostname-verification semantics. The mTLS client
|
the env var on so the worker's cert SAN must match the address
|
||||||
cert + CA pinning are what authenticate the peer.
|
the master connects to, on top of the existing CA + fingerprint
|
||||||
|
pin. Defaults to False so dev/test enrollments with mismatched
|
||||||
|
SANs keep working unchanged.
|
||||||
"""
|
"""
|
||||||
|
if verify_hostname is None:
|
||||||
|
from decnet.env import DECNET_VERIFY_HOSTNAME
|
||||||
|
verify_hostname = DECNET_VERIFY_HOSTNAME
|
||||||
if host is not None:
|
if host is not None:
|
||||||
self._address = host["address"]
|
self._address = host["address"]
|
||||||
self._port = int(host.get("agent_port") or 8765)
|
self._port = int(host.get("agent_port") or 8765)
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ from decnet.env import (
|
|||||||
DECNET_INGEST_LOG_FILE,
|
DECNET_INGEST_LOG_FILE,
|
||||||
DECNET_PROFILE_DIR,
|
DECNET_PROFILE_DIR,
|
||||||
DECNET_PROFILE_REQUESTS,
|
DECNET_PROFILE_REQUESTS,
|
||||||
|
validate_public_binding,
|
||||||
)
|
)
|
||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
from decnet.web.dependencies import repo
|
from decnet.web.dependencies import repo
|
||||||
@@ -69,6 +70,11 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
|||||||
soft,
|
soft,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Refuse to come up with a footgun config on a public binding (loopback
|
||||||
|
# CORS origin while bound to 0.0.0.0, plaintext canary base, etc.).
|
||||||
|
# Raises ValueError with an actionable message; uvicorn surfaces it.
|
||||||
|
validate_public_binding()
|
||||||
|
|
||||||
log.info("API startup initialising database")
|
log.info("API startup initialising database")
|
||||||
for attempt in range(1, 6):
|
for attempt in range(1, 6):
|
||||||
try:
|
try:
|
||||||
|
|||||||
89
tests/web/test_validate_public_binding.py
Normal file
89
tests/web/test_validate_public_binding.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
"""validate_public_binding refuses footgun configs at master startup.
|
||||||
|
|
||||||
|
The validator no-ops under pytest by design (so unit tests in unrelated
|
||||||
|
modules don't have to set five env vars per fixture); these tests strip
|
||||||
|
the PYTEST_* vars before calling it so the real code path runs.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _reimport_env(monkeypatch: pytest.MonkeyPatch):
|
||||||
|
for mod in list(sys.modules):
|
||||||
|
if mod == "decnet.env" or mod.startswith("decnet.env."):
|
||||||
|
sys.modules.pop(mod)
|
||||||
|
return importlib.import_module("decnet.env")
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_pytest_vars(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
import os
|
||||||
|
for k in list(os.environ):
|
||||||
|
if k.startswith("PYTEST"):
|
||||||
|
monkeypatch.delenv(k, raising=False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_noop_on_loopback_binding(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
monkeypatch.setenv("DECNET_API_HOST", "127.0.0.1")
|
||||||
|
monkeypatch.setenv("DECNET_CORS_ORIGINS", "http://localhost:8080")
|
||||||
|
env = _reimport_env(monkeypatch)
|
||||||
|
_strip_pytest_vars(monkeypatch)
|
||||||
|
env.validate_public_binding() # no raise
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_rejects_loopback_cors_on_public_bind(
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
monkeypatch.setenv("DECNET_API_HOST", "0.0.0.0")
|
||||||
|
monkeypatch.setenv("DECNET_CORS_ORIGINS", "http://localhost:8080")
|
||||||
|
env = _reimport_env(monkeypatch)
|
||||||
|
_strip_pytest_vars(monkeypatch)
|
||||||
|
with pytest.raises(ValueError, match="loopback origin"):
|
||||||
|
env.validate_public_binding()
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_accepts_public_cors_on_public_bind(
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
monkeypatch.setenv("DECNET_API_HOST", "0.0.0.0")
|
||||||
|
monkeypatch.setenv("DECNET_CORS_ORIGINS", "https://dashboard.example.com")
|
||||||
|
env = _reimport_env(monkeypatch)
|
||||||
|
_strip_pytest_vars(monkeypatch)
|
||||||
|
env.validate_public_binding() # no raise
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_rejects_plaintext_canary_on_public_bind(
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
monkeypatch.setenv("DECNET_API_HOST", "0.0.0.0")
|
||||||
|
monkeypatch.setenv("DECNET_CORS_ORIGINS", "https://dashboard.example.com")
|
||||||
|
monkeypatch.setenv("DECNET_CANARY_HTTP_BASE", "http://canary.example.com:8088")
|
||||||
|
env = _reimport_env(monkeypatch)
|
||||||
|
_strip_pytest_vars(monkeypatch)
|
||||||
|
with pytest.raises(ValueError, match="plaintext HTTP"):
|
||||||
|
env.validate_public_binding()
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_allows_loopback_canary_even_on_public_bind(
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
# Local canary endpoint behind the master is fine; only public-facing
|
||||||
|
# plaintext is the footgun.
|
||||||
|
monkeypatch.setenv("DECNET_API_HOST", "0.0.0.0")
|
||||||
|
monkeypatch.setenv("DECNET_CORS_ORIGINS", "https://dashboard.example.com")
|
||||||
|
monkeypatch.setenv("DECNET_CANARY_HTTP_BASE", "http://localhost:8088")
|
||||||
|
env = _reimport_env(monkeypatch)
|
||||||
|
_strip_pytest_vars(monkeypatch)
|
||||||
|
env.validate_public_binding() # no raise
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_skips_under_pytest(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
# With PYTEST_* still in env (default), even a misconfigured env passes —
|
||||||
|
# this is the deliberate bypass so unrelated tests don't trip on it.
|
||||||
|
monkeypatch.setenv("DECNET_API_HOST", "0.0.0.0")
|
||||||
|
monkeypatch.setenv("DECNET_CORS_ORIGINS", "http://localhost:8080")
|
||||||
|
env = _reimport_env(monkeypatch)
|
||||||
|
env.validate_public_binding() # no raise — guard short-circuits
|
||||||
Reference in New Issue
Block a user