fix(core): close HIGH ASVS findings V7.1.1 and correctness bugs BUG-1..6

- V7.1.1: /swarm/check no longer returns raw exception text; logs detail
  server-side, returns generic 'probe failed'.
- BUG-1: register EditAction -> SSHDriver so edit ticks no longer crash.
- BUG-2: topology reconcile matches generator-named deckies by
  expected-name membership instead of a hyphen heuristic.
- BUG-3: intel provider lookups acquire the per-provider semaphore so
  declared concurrency bounds are enforced.
- BUG-4: RuleIndex.install evicts a rule from kinds it no longer applies to.
- BUG-5: UnixSocketBus.connect() is lock-guarded with a double-check so
  concurrent first-connects open exactly one socket and reader task.
- BUG-6/V5.1.3: multi-token JSON-field search binds each token to a
  distinct parameter instead of collapsing to the last value.

Regression tests added for every fix, verified red-before/green-after.
V4.1.1c/V12.1.1 (updater master-CN gate) and V12.5.1 (tarball include-list)
confirmed already fixed in prior commits and left untouched.
This commit is contained in:
2026-06-09 23:12:49 -04:00
parent 8d18c59201
commit 6a8af315fb
16 changed files with 737 additions and 24 deletions

View File

@@ -105,14 +105,25 @@ class UnixSocketBus(BaseBus):
# ─── Lifecycle ──────────────────────────────────────────────────────────
async def connect(self) -> None:
# Double-checked locking: the cheap unlocked check fast-paths the
# already-connected case, but the actual connect must hold ``_lock``
# so two coroutines racing on a fresh bus (e.g. concurrent
# publish()/subscribe() both lazily calling connect()) can't each
# open a socket and spawn a reader task — the loser would orphan a
# live FD and an uncancelled reader_loop that close() never reaps.
if self._writer is not None:
return
if self._closed:
raise RuntimeError("connect on closed bus")
self._reader, self._writer = await asyncio.open_unix_connection(str(self._path))
await self._send(protocol.encode(protocol.HELLO, args=self._client_name))
self._reader_task = asyncio.create_task(self._reader_loop())
log.debug("bus.client: connected to %s as %s", self._path, self._client_name)
async with self._lock:
# Re-check under the lock: a racing caller may have connected
# while we awaited the lock.
if self._writer is not None:
return
if self._closed:
raise RuntimeError("connect on closed bus")
self._reader, self._writer = await asyncio.open_unix_connection(str(self._path))
await self._send(protocol.encode(protocol.HELLO, args=self._client_name))
self._reader_task = asyncio.create_task(self._reader_loop())
log.debug("bus.client: connected to %s as %s", self._path, self._client_name)
async def close(self) -> None:
if self._closed:

View File

@@ -1091,6 +1091,18 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N
lambda: _compose_ps(compose_path, project=compose_project),
)
bad: list[str] = []
# Build the set of expected decky base names so we can distinguish base
# containers from service containers without relying on a hyphen heuristic.
# The generator names every decky "decky-<NNN>" (which contains a hyphen),
# so `"-" not in service_name` would never match generator-named deckies.
# Instead, explicitly check membership in the known decky name set.
expected_decky_names: set[str] = set()
for _d in hydrated.get("deckies", []):
_cfg = _d.get("decky_config") or {}
_dn = _cfg.get("name") or _d.get("name")
if _dn:
expected_decky_names.add(_dn)
# Build the per-decky state map. The base container's compose
# service name == decky name, which is what we cache on the
# TopologyDecky row. Service containers (named ``<decky>-<svc>``)
@@ -1100,8 +1112,8 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N
for row in ps_rows:
state = str(row.get("State", "")).lower()
service_name = str(row.get("Service") or "")
if service_name and "-" not in service_name:
# Plain decky base; cache its docker state.
if service_name and service_name in expected_decky_names:
# This is a decky base container; cache its docker state.
decky_state_by_name[service_name] = state or "unknown"
if state and state != "running":
name = str(row.get("Name") or row.get("Service") or "?")

View File

@@ -104,8 +104,12 @@ async def _enrich_one(
value the providers see and is denormalised onto the row for SIEM /
audit consumers.
"""
async def _guarded_lookup(p: IntelProvider, ip: str) -> IntelResult:
async with p._semaphore:
return await p.lookup(ip)
results: list[IntelResult] = await asyncio.gather(
*(p.lookup(ip) for p in providers),
*(_guarded_lookup(p, ip) for p in providers),
return_exceptions=False, # providers contractually never raise
)

View File

@@ -13,7 +13,7 @@ from decnet.orchestrator.drivers.base import (
ActivityResult,
Driver,
)
from decnet.orchestrator.scheduler import Action, FileAction, TrafficAction
from decnet.orchestrator.scheduler import Action, EditAction, FileAction, TrafficAction
__all__ = [
"ActivityDriver",
@@ -58,7 +58,7 @@ def get_driver_for(action: Action) -> ActivityDriver:
# modules out of every importer's graph.
from decnet.orchestrator.drivers.ssh import SSHDriver
if isinstance(action, (TrafficAction, FileAction)):
if isinstance(action, (TrafficAction, FileAction, EditAction)):
return SSHDriver()
# EmailAction lands in stage 5; reachable only after that import is
# added to scheduler. Importing inside the branch avoids a cycle
@@ -66,7 +66,7 @@ def get_driver_for(action: Action) -> ActivityDriver:
try:
from decnet.orchestrator.emailgen.scheduler import EmailAction
except ImportError: # pragma: no cover - scheduler always exists
EmailAction = None # type: ignore[assignment, misc]
EmailAction = None # type: ignore[misc]
if EmailAction is not None and isinstance(action, EmailAction):
from decnet.orchestrator.drivers.email import EmailDriver
return EmailDriver()

View File

@@ -68,6 +68,15 @@ class RuleIndex:
if not rule.applies_to and not rule.emits:
self.evict(rule.rule_id)
return
# Evict stale kind-buckets for any kinds the updated rule no longer
# claims, so re-install with a narrower applies_to doesn't leave
# ghost entries in the old buckets.
old = self._by_rule.get(rule.rule_id)
if old is not None:
stale_kinds = old.applies_to - rule.applies_to
for kind in stale_kinds:
current = self._by_kind.get(kind, [])
self._by_kind[kind] = [r for r in current if r.rule_id != rule.rule_id]
self._by_rule[rule.rule_id] = rule
for kind in rule.applies_to:
current = self._by_kind.get(kind, [])

View File

@@ -118,10 +118,10 @@ class MySQLRepository(SQLModelRepository):
await lock_conn.execute(text("SELECT RELEASE_LOCK('decnet_schema_init')"))
await lock_conn.close()
def _json_field_equals(self, key: str):
def _json_field_equals(self, key: str, param_name: str = "val"):
# MySQL 5.7+ exposes JSON_EXTRACT; quoted string result returned for
# TEXT-stored JSON, same behavior we rely on in SQLite.
return text(f"JSON_UNQUOTE(JSON_EXTRACT(fields, '$.{key}')) = :val")
return text(f"JSON_UNQUOTE(JSON_EXTRACT(fields, '$.{key}')) = :{param_name}")
async def _insert_tags_or_ignore(self, rows: list[TTPTag]) -> int:
"""Bulk-insert with MySQL's ``INSERT IGNORE`` on the ``uuid`` PK.

View File

@@ -56,9 +56,9 @@ class SQLiteRepository(SQLModelRepository):
"ALTER TABLE attackers ADD COLUMN country_source VARCHAR(16)"
))
def _json_field_equals(self, key: str):
def _json_field_equals(self, key: str, param_name: str = "val"):
# SQLite stores JSON as text; json_extract is the canonical accessor.
return text(f"json_extract(fields, '$.{key}') = :val")
return text(f"json_extract(fields, '$.{key}') = :{param_name}")
async def _insert_tags_or_ignore(self, rows: list[TTPTag]) -> int:
"""Bulk-insert with SQLite's ``ON CONFLICT DO NOTHING`` on the

View File

@@ -84,6 +84,7 @@ class LogsMixin(_MixinBase):
"attacker_ip": Log.attacker_ip,
}
_json_token_idx = 0
for token in tokens:
if ":" in token:
key, val = token.split(":", 1)
@@ -92,9 +93,15 @@ class LogsMixin(_MixinBase):
else:
key_safe = re.sub(r"[^a-zA-Z0-9_]", "", key)
if key_safe:
# Each JSON-field filter needs its own bind-param
# name; sharing `:val` across multiple tokens means
# only the last `.params(val=...)` call survives
# and earlier filters match the wrong value.
param_name = f"jval_{_json_token_idx}"
_json_token_idx += 1
statement = statement.where(
self._json_field_equals(key_safe)
).params(val=val)
self._json_field_equals(key_safe, param_name)
).params(**{param_name: val})
else:
lk = f"%{token}%"
statement = statement.where(
@@ -107,15 +114,17 @@ class LogsMixin(_MixinBase):
)
return statement
def _json_field_equals(self, key: str):
"""Return a text() predicate that matches rows where fields->key == :val.
def _json_field_equals(self, key: str, param_name: str = "val"):
"""Return a text() predicate that matches rows where fields->key == :<param_name>.
Both SQLite and MySQL expose a ``JSON_EXTRACT`` function; MySQL also
exposes the same function under ``json_extract`` (case-insensitive).
The ``:val`` parameter is bound separately and must be supplied with
``.params(val=...)`` by the caller, which keeps us safe from injection.
The bind parameter is supplied with ``.params(<param_name>=...)`` by
the caller. Pass a distinct ``param_name`` for each token so that
multiple JSON-field filters in the same query each bind their own
value instead of sharing the last-written ``:val``.
"""
return text(f"JSON_EXTRACT(fields, '$.{key}') = :val")
return text(f"JSON_EXTRACT(fields, '$.{key}') = :{param_name}")
async def get_logs(
self,

View File

@@ -59,6 +59,9 @@ async def api_check_hosts(
detail=body,
)
except Exception as exc:
# Log the real exception server-side; never surface internal
# exception text (file paths, TLS internals, library guts) to the
# caller. Same fail-closed posture as the global 500 handler.
log.warning("swarm.check unreachable host=%s err=%s", host["name"], exc)
await repo.update_swarm_host(host["uuid"], {"status": "unreachable"})
return SwarmHostHealth(
@@ -66,7 +69,7 @@ async def api_check_hosts(
name=host["name"],
address=host["address"],
reachable=False,
detail=str(exc),
detail="probe failed",
)
results = await asyncio.gather(*(_probe(h) for h in hosts))