feat(1.1): supervise cpu group with ProcessPoolExecutor kernel offload
Hosts clusterer/campaign-clusterer/attribution/reuse-correlate in one process. The two O(n^2) connected-components kernels (cluster_observations, cluster_identities) offload to ONE shared forkserver pool via decnet.offload .run_kernel, so they run in parallel instead of serialising under the GIL. - offload.run_kernel: pool when installed + offload_if holds, else inline. Standalone workers and all tests run inline => behaviour unchanged (424 clustering/correlation tests green). - offload_if gates on input size (>=256) to skip pickle cost on small passes. - forkserver (not fork): supervisor is multithreaded via bus clients. - attribution/reuse co-located but not offloaded yet (lighter; same run_kernel path extends to them if profiling shows contention). - systemd unit Conflicts= the 4 units it replaces; no docker/raw-socket priv.
This commit is contained in:
47
deploy/decnet-supervise-cpu.service.j2
Normal file
47
deploy/decnet-supervise-cpu.service.j2
Normal file
@@ -0,0 +1,47 @@
|
||||
[Unit]
|
||||
Description=DECNET CPU Supervisor (clusterer + campaign-clusterer + attribution + reuse-correlate in one process, kernels offloaded to a shared pool)
|
||||
Documentation=https://git.resacachile.cl/anti/DECNET/wiki/Workers#supervisor
|
||||
After=network-online.target decnet-bus.service
|
||||
Wants=network-online.target decnet-bus.service
|
||||
# Replaces the individual clusterer / campaign-clusterer / attribution /
|
||||
# reuse-correlator units. Do NOT enable those alongside this one.
|
||||
Conflicts=decnet-clusterer.service decnet-campaign-clusterer.service decnet-attribution.service decnet-reuse-correlator.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User={{ user }}
|
||||
Group={{ group }}
|
||||
WorkingDirectory={{ install_dir }}
|
||||
EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
Environment=DECNET_SYSTEM_LOGS=/var/log/decnet/decnet.supervise-cpu.log
|
||||
ExecStart={{ venv_dir }}/bin/decnet supervise cpu
|
||||
StandardOutput=append:/var/log/decnet/decnet.supervise-cpu.log
|
||||
StandardError=append:/var/log/decnet/decnet.supervise-cpu.log
|
||||
|
||||
# These are read-heavy correlators (DB in, DB out, bus). No docker socket, no
|
||||
# raw sockets — so unlike the batch supervisor this carries NO extra privilege
|
||||
# beyond DB + network. The forkserver pool spawns short-lived compute children
|
||||
# that inherit only this unit's sandbox.
|
||||
|
||||
CapabilityBoundingSet=
|
||||
AmbientCapabilities=
|
||||
|
||||
# Security Hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=full
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
LockPersonality=yes
|
||||
ReadOnlyPaths=/var/lib/decnet
|
||||
ReadWritePaths={{ install_dir }} /var/log/decnet
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
TimeoutStopSec=20
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
Reference in New Issue
Block a user