From e32fdf9cbf3af8eed11e4d291e20f0e229c0dd3e Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 19 Apr 2026 05:12:55 -0400 Subject: [PATCH] feat(swarm-mgmt): agent_host + updater opt-in; prevent duplicate forwarder spawn --- decnet/cli.py | 11 +++++ .../router/swarm_mgmt/api_enroll_bundle.py | 38 ++++++++++++-- decnet/web/templates/enroll_bootstrap.sh.j2 | 11 +++++ decnet_web/src/components/AgentEnrollment.tsx | 26 +++++++++- decnet_web/src/components/Swarm.css | 6 +++ tests/api/swarm_mgmt/test_enroll_bundle.py | 49 ++++++++++++++++++- tests/live/test_service_isolation_live.py | 7 ++- tests/mysql_spinup.sh | 4 +- 8 files changed, 141 insertions(+), 11 deletions(-) diff --git a/decnet/cli.py b/decnet/cli.py index a3dee5a..feaf2c8 100644 --- a/decnet/cli.py +++ b/decnet/cli.py @@ -86,6 +86,17 @@ def _spawn_detached(argv: list[str], pid_file: Path) -> int: import os import subprocess # nosec B404 + # If the pid_file points at a live process, don't spawn a duplicate — + # agent/swarmctl auto-spawn is called on every startup, and the first + # run's sibling is still alive across restarts. + if pid_file.exists(): + try: + existing = int(pid_file.read_text().strip()) + os.kill(existing, 0) + return existing + except (ValueError, ProcessLookupError, PermissionError, OSError): + pass # stale pid_file — fall through and spawn + with open(os.devnull, "rb") as dn_in, open(os.devnull, "ab") as dn_out: proc = subprocess.Popen( # nosec B603 argv, diff --git a/decnet/web/router/swarm_mgmt/api_enroll_bundle.py b/decnet/web/router/swarm_mgmt/api_enroll_bundle.py index b8f5f0e..218efdb 100644 --- a/decnet/web/router/swarm_mgmt/api_enroll_bundle.py +++ b/decnet/web/router/swarm_mgmt/api_enroll_bundle.py @@ -83,6 +83,12 @@ class EnrollBundleRequest(BaseModel): description="IP/host the agent will reach back to") agent_name: str = Field(..., pattern=r"^[a-z0-9][a-z0-9-]{0,62}$", description="Worker name (DNS-label safe)") + agent_host: str = Field(..., min_length=1, max_length=253, + description="IP/host of the new worker — shown in SwarmHosts and used as cert SAN") + with_updater: bool = Field( + default=True, + description="Include updater cert bundle and auto-start decnet updater on the agent", + ) services_ini: Optional[str] = Field( default=None, description="Optional INI text shipped to the agent as /etc/decnet/services.ini", @@ -190,11 +196,13 @@ def _build_tarball( master_host: str, issued: pki.IssuedCert, services_ini: Optional[str], + updater_issued: Optional[pki.IssuedCert] = None, ) -> bytes: """Gzipped tarball with: - full repo source (minus excludes) - etc/decnet/decnet.ini (pre-baked for mode=agent) - home/.decnet/agent/{ca.crt,worker.crt,worker.key} + - home/.decnet/updater/{ca.crt,updater.crt,updater.key} (if updater_issued) - services.ini at root if provided """ root = _repo_root() @@ -213,6 +221,11 @@ def _build_tarball( _add_bytes(tar, "home/.decnet/agent/worker.crt", issued.cert_pem) _add_bytes(tar, "home/.decnet/agent/worker.key", issued.key_pem, mode=0o600) + if updater_issued is not None: + _add_bytes(tar, "home/.decnet/updater/ca.crt", updater_issued.ca_cert_pem) + _add_bytes(tar, "home/.decnet/updater/updater.crt", updater_issued.cert_pem) + _add_bytes(tar, "home/.decnet/updater/updater.key", updater_issued.key_pem, mode=0o600) + if services_ini: _add_bytes(tar, "services.ini", services_ini.encode()) @@ -224,6 +237,7 @@ def _render_bootstrap( master_host: str, tarball_url: str, expires_at: datetime, + with_updater: bool, ) -> bytes: tpl_path = pathlib.Path(__file__).resolve().parents[1].parent / "templates" / "enroll_bootstrap.sh.j2" tpl = tpl_path.read_text() @@ -234,6 +248,7 @@ def _render_bootstrap( .replace("{{ tarball_url }}", tarball_url) .replace("{{ generated_at }}", now) .replace("{{ expires_at }}", expires_at.replace(microsecond=0).isoformat()) + .replace("{{ with_updater }}", "true" if with_updater else "false") ) return rendered.encode() @@ -262,22 +277,35 @@ async def create_enroll_bundle( # 1. Issue certs (reuses the same code as /swarm/enroll). ca = pki.ensure_ca() - sans = list({req.agent_name, req.master_host}) + sans = list({req.agent_name, req.agent_host, req.master_host}) issued = pki.issue_worker_cert(ca, req.agent_name, sans) bundle_dir = pki.DEFAULT_CA_DIR / "workers" / req.agent_name pki.write_worker_bundle(issued, bundle_dir) + updater_issued: Optional[pki.IssuedCert] = None + updater_fp: Optional[str] = None + if req.with_updater: + updater_cn = f"updater@{req.agent_name}" + updater_sans = list({*sans, updater_cn, "127.0.0.1"}) + updater_issued = pki.issue_worker_cert(ca, updater_cn, updater_sans) + updater_dir = bundle_dir / "updater" + updater_dir.mkdir(parents=True, exist_ok=True) + (updater_dir / "updater.crt").write_bytes(updater_issued.cert_pem) + (updater_dir / "updater.key").write_bytes(updater_issued.key_pem) + os.chmod(updater_dir / "updater.key", 0o600) + updater_fp = updater_issued.fingerprint_sha256 + # 2. Register the host row so it shows up in SwarmHosts immediately. host_uuid = str(_uuid.uuid4()) await repo.add_swarm_host( { "uuid": host_uuid, "name": req.agent_name, - "address": req.master_host, # placeholder; agent overwrites on first heartbeat + "address": req.agent_host, "agent_port": 8765, "status": "enrolled", "client_cert_fingerprint": issued.fingerprint_sha256, - "updater_cert_fingerprint": None, + "updater_cert_fingerprint": updater_fp, "cert_bundle_path": str(bundle_dir), "enrolled_at": datetime.now(timezone.utc), "notes": "enrolled via UI bundle", @@ -285,7 +313,7 @@ async def create_enroll_bundle( ) # 3. Render payload + bootstrap. - tarball = _build_tarball(req.master_host, issued, req.services_ini) + tarball = _build_tarball(req.master_host, issued, req.services_ini, updater_issued) token = secrets.token_urlsafe(24) expires_at = datetime.now(timezone.utc) + BUNDLE_TTL @@ -302,7 +330,7 @@ async def create_enroll_bundle( base = f"{scheme}://{netloc}" tarball_url = f"{base}/api/v1/swarm/enroll-bundle/{token}.tgz" bootstrap_url = f"{base}/api/v1/swarm/enroll-bundle/{token}.sh" - script = _render_bootstrap(req.agent_name, req.master_host, tarball_url, expires_at) + script = _render_bootstrap(req.agent_name, req.master_host, tarball_url, expires_at, req.with_updater) tgz_path.write_bytes(tarball) sh_path.write_bytes(script) diff --git a/decnet/web/templates/enroll_bootstrap.sh.j2 b/decnet/web/templates/enroll_bootstrap.sh.j2 index afa8566..b587b19 100644 --- a/decnet/web/templates/enroll_bootstrap.sh.j2 +++ b/decnet/web/templates/enroll_bootstrap.sh.j2 @@ -38,9 +38,20 @@ for f in ca.crt worker.crt worker.key; do "home/.decnet/agent/$f" "$REAL_HOME/.decnet/agent/$f" done +WITH_UPDATER="{{ with_updater }}" +if [[ "$WITH_UPDATER" == "true" && -d home/.decnet/updater ]]; then + for f in ca.crt updater.crt updater.key; do + install -Dm0600 -o "$REAL_USER" -g "$REAL_USER" \ + "home/.decnet/updater/$f" "$REAL_HOME/.decnet/updater/$f" + done +fi + # Guarantee the pip-installed entrypoint is executable (some setuptools+editable # combos drop it with mode 0644) and expose it on PATH. chmod 0755 "$INSTALL_DIR/.venv/bin/decnet" ln -sf "$INSTALL_DIR/.venv/bin/decnet" /usr/local/bin/decnet sudo -u "$REAL_USER" /usr/local/bin/decnet agent --daemon +if [[ "$WITH_UPDATER" == "true" ]]; then + sudo -u "$REAL_USER" /usr/local/bin/decnet updater --daemon +fi echo "[DECNET] agent {{ agent_name }} enrolled -> {{ master_host }}. Forwarder auto-spawned." diff --git a/decnet_web/src/components/AgentEnrollment.tsx b/decnet_web/src/components/AgentEnrollment.tsx index 05e2a20..0b6677a 100644 --- a/decnet_web/src/components/AgentEnrollment.tsx +++ b/decnet_web/src/components/AgentEnrollment.tsx @@ -14,6 +14,8 @@ interface BundleResult { const AgentEnrollment: React.FC = () => { const [masterHost, setMasterHost] = useState(window.location.hostname); const [agentName, setAgentName] = useState(''); + const [agentHost, setAgentHost] = useState(''); + const [withUpdater, setWithUpdater] = useState(true); const [servicesIni, setServicesIni] = useState(null); const [servicesIniName, setServicesIniName] = useState(null); const [submitting, setSubmitting] = useState(false); @@ -47,6 +49,8 @@ const AgentEnrollment: React.FC = () => { setResult(null); setError(null); setAgentName(''); + setAgentHost(''); + setWithUpdater(true); setServicesIni(null); setServicesIniName(null); setCopied(false); @@ -61,6 +65,8 @@ const AgentEnrollment: React.FC = () => { const res = await api.post('/swarm/enroll-bundle', { master_host: masterHost, agent_name: agentName, + agent_host: agentHost, + with_updater: withUpdater, services_ini: servicesIni, }); setResult(res.data); @@ -106,6 +112,16 @@ const AgentEnrollment: React.FC = () => { required /> + +