fix(core): close HIGH ASVS findings V7.1.1 and correctness bugs BUG-1..6
- V7.1.1: /swarm/check no longer returns raw exception text; logs detail server-side, returns generic 'probe failed'. - BUG-1: register EditAction -> SSHDriver so edit ticks no longer crash. - BUG-2: topology reconcile matches generator-named deckies by expected-name membership instead of a hyphen heuristic. - BUG-3: intel provider lookups acquire the per-provider semaphore so declared concurrency bounds are enforced. - BUG-4: RuleIndex.install evicts a rule from kinds it no longer applies to. - BUG-5: UnixSocketBus.connect() is lock-guarded with a double-check so concurrent first-connects open exactly one socket and reader task. - BUG-6/V5.1.3: multi-token JSON-field search binds each token to a distinct parameter instead of collapsing to the last value. Regression tests added for every fix, verified red-before/green-after. V4.1.1c/V12.1.1 (updater master-CN gate) and V12.5.1 (tarball include-list) confirmed already fixed in prior commits and left untouched.
This commit is contained in:
@@ -7,6 +7,7 @@ the tmp filesystem — no Docker, no external broker.
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import pathlib
|
||||
import stat
|
||||
|
||||
@@ -130,3 +131,52 @@ class TestEndToEnd:
|
||||
server = BusServer(sock, group=None)
|
||||
with pytest.raises(FileNotFoundError):
|
||||
await server.start()
|
||||
|
||||
|
||||
class TestConcurrentConnect:
|
||||
"""BUG-5: connect() must hold the bus lock so concurrent first-connects
|
||||
can't each open a socket and spawn a reader (orphaning the loser's FD +
|
||||
reader_loop task)."""
|
||||
|
||||
async def test_concurrent_connect_opens_one_socket_and_reader(
|
||||
self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
sock = tmp_path / "bus.sock"
|
||||
server = BusServer(sock, group=None)
|
||||
await server.start()
|
||||
serve_task = asyncio.create_task(server.serve_forever())
|
||||
|
||||
bus = UnixSocketBus(sock, client_name="race-client")
|
||||
|
||||
# Wrap the real transport opener with a counter, and yield control
|
||||
# mid-open so two racing connect() calls actually interleave. Without
|
||||
# the lock both would pass the `self._writer is None` guard and each
|
||||
# open a socket; the lock + re-check collapse that to one open.
|
||||
real_open = asyncio.open_unix_connection
|
||||
calls = 0
|
||||
|
||||
async def _counting_open(*args, **kwargs):
|
||||
nonlocal calls
|
||||
calls += 1
|
||||
await asyncio.sleep(0) # force the scheduler to interleave callers
|
||||
return await real_open(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(asyncio, "open_unix_connection", _counting_open)
|
||||
|
||||
try:
|
||||
await asyncio.gather(bus.connect(), bus.connect(), bus.connect())
|
||||
|
||||
# Exactly one socket opened and exactly one live reader task.
|
||||
assert calls == 1
|
||||
assert bus._writer is not None
|
||||
assert bus._reader_task is not None
|
||||
assert not bus._reader_task.done()
|
||||
finally:
|
||||
await bus.close()
|
||||
serve_task.cancel()
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
await serve_task
|
||||
await server.close()
|
||||
|
||||
# close() reaped the single reader task — no orphans left behind.
|
||||
assert bus._reader_task is None
|
||||
|
||||
113
tests/db/test_log_multi_token_search.py
Normal file
113
tests/db/test_log_multi_token_search.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Regression test for BUG-6: multi-token JSON-field search collapses all
|
||||
custom filters to the last value (shared :val bind parameter).
|
||||
|
||||
Root cause: ``_apply_filters`` loops over search tokens and for each
|
||||
JSON-field token calls ``.params(val=val)``. Because every call reuses the
|
||||
same bind name ``:val``, SQLAlchemy's last ``.params()`` call overwrites all
|
||||
earlier ones — only the last JSON-field token's value is actually bound.
|
||||
|
||||
The fix gives each JSON-field token a distinct bind parameter name
|
||||
(``jval_0``, ``jval_1``, …) so every token value survives.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path: Path):
|
||||
r = get_repository(db_path=str(tmp_path / "log_search.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
async def _add_log(repo, **kwargs) -> None:
|
||||
base = {
|
||||
"raw_line": "test",
|
||||
"decky": "decky-01",
|
||||
"service": "ssh",
|
||||
"event_type": "cmd",
|
||||
"attacker_ip": "10.0.0.1",
|
||||
"timestamp": "2025-01-01T00:00:00",
|
||||
"fields": {},
|
||||
}
|
||||
base.update(kwargs)
|
||||
await repo.add_log(base)
|
||||
|
||||
|
||||
# ── BUG-6 regression ─────────────────────────────────────────────────────────
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_single_json_field_token_matches(repo) -> None:
|
||||
"""Baseline: a single JSON-field token filters correctly."""
|
||||
await _add_log(repo, fields={"cmd": "ls", "user": "root"})
|
||||
await _add_log(repo, fields={"cmd": "rm", "user": "bob"})
|
||||
|
||||
logs = await repo.get_logs(search='cmd:ls')
|
||||
assert len(logs) == 1
|
||||
assert json.loads(logs[0]["fields"])["cmd"] == "ls"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_two_distinct_json_field_tokens_both_applied(repo) -> None:
|
||||
"""BUG-6 regression: two JSON-field tokens must BOTH filter.
|
||||
|
||||
Before the fix, only the last token's value was bound. A search for
|
||||
``cmd:ls user:root`` would execute with ``val='root'`` for both
|
||||
predicates — rows with ``cmd='ls'`` but ``user='bob'`` would appear
|
||||
in the results instead of being filtered out.
|
||||
"""
|
||||
await _add_log(repo, fields={"cmd": "ls", "user": "root"}) # should match
|
||||
await _add_log(repo, fields={"cmd": "ls", "user": "bob"}) # cmd matches, user doesn't
|
||||
await _add_log(repo, fields={"cmd": "rm", "user": "root"}) # user matches, cmd doesn't
|
||||
await _add_log(repo, fields={"cmd": "rm", "user": "bob"}) # neither matches
|
||||
|
||||
logs = await repo.get_logs(search='cmd:ls user:root')
|
||||
# Only the first row satisfies both predicates.
|
||||
assert len(logs) == 1, (
|
||||
f"Expected 1 log matching cmd:ls AND user:root, got {len(logs)}. "
|
||||
"BUG-6: shared :val bind param causes last token to overwrite earlier ones."
|
||||
)
|
||||
fields = json.loads(logs[0]["fields"])
|
||||
assert fields["cmd"] == "ls"
|
||||
assert fields["user"] == "root"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_three_json_field_tokens_all_applied(repo) -> None:
|
||||
"""Three JSON-field tokens must all filter independently."""
|
||||
await _add_log(repo, fields={"a": "1", "b": "2", "c": "3"}) # full match
|
||||
await _add_log(repo, fields={"a": "1", "b": "2", "c": "X"}) # c mismatch
|
||||
await _add_log(repo, fields={"a": "1", "b": "X", "c": "3"}) # b mismatch
|
||||
await _add_log(repo, fields={"a": "X", "b": "2", "c": "3"}) # a mismatch
|
||||
|
||||
logs = await repo.get_logs(search='a:1 b:2 c:3')
|
||||
assert len(logs) == 1
|
||||
fields = json.loads(logs[0]["fields"])
|
||||
assert fields == {"a": "1", "b": "2", "c": "3"}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_json_field_token_mixed_with_core_field_token(repo) -> None:
|
||||
"""A JSON-field token combined with a core-field filter both apply."""
|
||||
await _add_log(
|
||||
repo,
|
||||
decky="decky-01",
|
||||
fields={"cmd": "whoami"},
|
||||
)
|
||||
await _add_log(
|
||||
repo,
|
||||
decky="decky-02",
|
||||
fields={"cmd": "whoami"},
|
||||
)
|
||||
|
||||
# Only decky-01 row should match.
|
||||
logs = await repo.get_logs(search='decky:decky-01 cmd:whoami')
|
||||
assert len(logs) == 1
|
||||
assert logs[0]["decky"] == "decky-01"
|
||||
232
tests/deploy/test_reconcile_generator_names.py
Normal file
232
tests/deploy/test_reconcile_generator_names.py
Normal file
@@ -0,0 +1,232 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""BUG-2 regression: post-deploy reconcile must NOT mark generator-named
|
||||
deckies (``decky-NNN``) as ``failed`` when their containers are running.
|
||||
|
||||
Root cause: the OLD heuristic ``"-" not in service_name`` never fires for
|
||||
generator-named deckies because those names always contain a hyphen. The fix
|
||||
replaces the heuristic with explicit set-membership against
|
||||
``expected_decky_names`` built from ``hydrated['deckies']``.
|
||||
|
||||
These tests exercise the REAL production code path:
|
||||
``decnet.engine.deployer.deploy_topology``. They mock every external I/O
|
||||
boundary (Docker, compose, repo, filesystem) at the same layer used by the
|
||||
rest of the deploy test-suite, so the assertions flow through the actual
|
||||
``expected_decky_names`` / ``decky_state_by_name`` logic in deployer.py.
|
||||
A revert of the BUG-2 fix causes both primary tests to FAIL (red-before /
|
||||
green-after verified manually — see docstring on each test).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ── helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def _make_decky(name: str, *, uuid_val: str | None = None) -> dict[str, Any]:
|
||||
return {
|
||||
"uuid": uuid_val or str(uuid.uuid4()),
|
||||
"name": name,
|
||||
"decky_config": {"name": name},
|
||||
}
|
||||
|
||||
|
||||
def _ps_rows(decky_name: str, *service_suffixes: str, state: str = "running") -> list[dict]:
|
||||
"""Simulate ``docker compose ps`` JSON rows for one decky + its services."""
|
||||
rows: list[dict] = [
|
||||
{"Service": decky_name, "Name": decky_name, "State": state, "ExitCode": 0},
|
||||
]
|
||||
for svc in service_suffixes:
|
||||
container = f"{decky_name}-{svc}"
|
||||
rows.append({"Service": container, "Name": container, "State": state, "ExitCode": 0})
|
||||
return rows
|
||||
|
||||
|
||||
def _build_hydrated(deckies: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
"""Minimal hydrated topology dict that satisfies deploy_topology's lookups."""
|
||||
return {
|
||||
"topology": {
|
||||
"uuid": "topo-test-1234",
|
||||
# No target_host_uuid → master-local deploy path
|
||||
},
|
||||
"lans": [
|
||||
{
|
||||
"name": "DMZ",
|
||||
"subnet": "10.99.0.0/24",
|
||||
"is_dmz": True,
|
||||
}
|
||||
],
|
||||
"deckies": deckies,
|
||||
}
|
||||
|
||||
|
||||
async def _run_deploy(hydrated: dict, ps_rows: list[dict]) -> dict[str, str]:
|
||||
"""Drive deploy_topology with full I/O mocks; return the state values
|
||||
passed to ``repo.update_topology_decky`` keyed by decky UUID."""
|
||||
from decnet.engine import deployer as _dep
|
||||
|
||||
topology_id = hydrated["topology"]["uuid"]
|
||||
recorded: dict[str, str] = {}
|
||||
|
||||
repo = MagicMock()
|
||||
repo.update_topology_decky = AsyncMock(side_effect=lambda uid, patch: recorded.__setitem__(uid, patch["state"]))
|
||||
|
||||
# Map uuid → name so we can translate the assertion later
|
||||
uuid_to_name = {d["uuid"]: d["name"] for d in hydrated["deckies"]}
|
||||
|
||||
with (
|
||||
patch.object(_dep, "hydrate", new=AsyncMock(return_value=hydrated)),
|
||||
patch.object(_dep, "_validate_topology", return_value={}),
|
||||
patch.object(_dep, "_validation_errors", return_value=False),
|
||||
patch.object(_dep, "check_no_host_port_collision", return_value=[]),
|
||||
patch.object(_dep, "_warn_if_userland_proxy_enabled"),
|
||||
patch.object(_dep, "transition_status", new=AsyncMock()),
|
||||
# _topology_compose_path must return a Path; compose_path.exists()
|
||||
# is checked in the rollback guard — return a path that does NOT exist
|
||||
# so the rollback branch is skipped.
|
||||
patch.object(_dep, "_topology_compose_path", return_value=Path("/nonexistent/compose.yml")),
|
||||
patch.object(_dep, "_topology_compose_project", return_value="test-project"),
|
||||
patch.object(_dep, "create_bridge_network"),
|
||||
patch.object(_dep, "write_topology_compose"),
|
||||
# _compose_with_retry is called inside anyio.to_thread.run_sync(lambda: ...)
|
||||
# We patch it so the lambda is a no-op.
|
||||
patch.object(_dep, "_compose_with_retry"),
|
||||
# _compose_ps is also called inside anyio.to_thread.run_sync; patch it
|
||||
# to return our controlled rows.
|
||||
patch.object(_dep, "_compose_ps", return_value=ps_rows),
|
||||
# docker.from_env() is called at deploy time
|
||||
patch("decnet.engine.deployer.docker") as mock_docker,
|
||||
# Silence the canary planter import that runs at the end
|
||||
patch.dict("sys.modules", {"decnet.canary": MagicMock(), "decnet.canary.planter": MagicMock()}),
|
||||
):
|
||||
mock_docker.from_env.return_value = MagicMock()
|
||||
await _dep.deploy_topology(repo, topology_id)
|
||||
|
||||
# Translate uuid keys → decky names for readable assertions
|
||||
return {uuid_to_name[uid]: state for uid, state in recorded.items()}
|
||||
|
||||
|
||||
# ── BUG-2 primary regression tests ───────────────────────────────────────────
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_generator_named_decky_reconciles_running() -> None:
|
||||
"""BUG-2 primary: generator-named decky whose container is RUNNING must be
|
||||
reconciled to state='running', NOT 'failed'.
|
||||
|
||||
RED before fix: the old ``"-" not in service_name`` heuristic never cached
|
||||
"decky-001" (contains a hyphen), so ``decky_state_by_name.get("decky-001")``
|
||||
returned ``"unknown"`` and new_state was forced to ``"failed"``.
|
||||
GREEN after fix: membership check against expected_decky_names finds
|
||||
"decky-001" and correctly stores state="running".
|
||||
"""
|
||||
decky = _make_decky("decky-001")
|
||||
hydrated = _build_hydrated([decky])
|
||||
ps = _ps_rows("decky-001", "ssh", "http") # base + two service containers
|
||||
|
||||
result = await _run_deploy(hydrated, ps)
|
||||
|
||||
assert result["decky-001"] == "running", (
|
||||
"Generator-named decky with running container must reconcile to 'running'"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_absent_decky_reconciles_failed() -> None:
|
||||
"""Genuinely absent / stopped decky must reconcile to state='failed'.
|
||||
|
||||
This covers the other branch: if no ps row matches the decky name
|
||||
(container never started or exited), new_state must be 'failed'.
|
||||
GREEN in both old and new code — ensures the 'failed' path is not broken
|
||||
by the BUG-2 fix.
|
||||
"""
|
||||
decky = _make_decky("decky-002")
|
||||
hydrated = _build_hydrated([decky])
|
||||
# ps rows contain nothing for decky-002 — simulates a decky that never started
|
||||
ps: list[dict] = []
|
||||
|
||||
result = await _run_deploy(hydrated, ps)
|
||||
|
||||
assert result["decky-002"] == "failed", (
|
||||
"Decky with no running container must reconcile to 'failed'"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_both_branches_in_one_topology() -> None:
|
||||
"""Running generator-named decky → 'running'; absent decky → 'failed'.
|
||||
|
||||
Exercises both branches of the reconcile loop simultaneously, which
|
||||
is the most direct regression guard: if the fix is reverted, decky-001
|
||||
flips to 'failed' while decky-002 stays 'failed', making the first
|
||||
assertion fail.
|
||||
"""
|
||||
decky_running = _make_decky("decky-001")
|
||||
decky_absent = _make_decky("decky-099")
|
||||
hydrated = _build_hydrated([decky_running, decky_absent])
|
||||
|
||||
# Only decky-001 has running containers; decky-099 has none
|
||||
ps = _ps_rows("decky-001", "ssh")
|
||||
|
||||
result = await _run_deploy(hydrated, ps)
|
||||
|
||||
assert result["decky-001"] == "running", (
|
||||
"Running generator-named decky must not be marked failed"
|
||||
)
|
||||
assert result["decky-099"] == "failed", (
|
||||
"Absent decky must be marked failed"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_decky_config_nested_name_is_honoured() -> None:
|
||||
"""When decky_config.name differs from outer name, the config name is
|
||||
used for both compose service lookup and repo update — same logic as
|
||||
deployer.py lines 1101-1104 and 1136-1138."""
|
||||
outer_name = "old-outer-name"
|
||||
config_name = "decky-007"
|
||||
uid = str(uuid.uuid4())
|
||||
decky = {
|
||||
"uuid": uid,
|
||||
"name": outer_name,
|
||||
"decky_config": {"name": config_name},
|
||||
}
|
||||
hydrated = _build_hydrated([decky])
|
||||
ps = _ps_rows(config_name, "ssh")
|
||||
|
||||
from decnet.engine import deployer as _dep
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
recorded: dict[str, str] = {}
|
||||
repo = MagicMock()
|
||||
repo.update_topology_decky = AsyncMock(
|
||||
side_effect=lambda u, p: recorded.__setitem__(u, p["state"])
|
||||
)
|
||||
|
||||
topology_id = hydrated["topology"]["uuid"]
|
||||
|
||||
with (
|
||||
patch.object(_dep, "hydrate", new=AsyncMock(return_value=hydrated)),
|
||||
patch.object(_dep, "_validate_topology", return_value={}),
|
||||
patch.object(_dep, "_validation_errors", return_value=False),
|
||||
patch.object(_dep, "check_no_host_port_collision", return_value=[]),
|
||||
patch.object(_dep, "_warn_if_userland_proxy_enabled"),
|
||||
patch.object(_dep, "transition_status", new=AsyncMock()),
|
||||
patch.object(_dep, "_topology_compose_path", return_value=Path("/nonexistent/compose.yml")),
|
||||
patch.object(_dep, "_topology_compose_project", return_value="test-project"),
|
||||
patch.object(_dep, "create_bridge_network"),
|
||||
patch.object(_dep, "write_topology_compose"),
|
||||
patch.object(_dep, "_compose_with_retry"),
|
||||
patch.object(_dep, "_compose_ps", return_value=ps),
|
||||
patch("decnet.engine.deployer.docker") as mock_docker,
|
||||
patch.dict("sys.modules", {"decnet.canary": MagicMock(), "decnet.canary.planter": MagicMock()}),
|
||||
):
|
||||
mock_docker.from_env.return_value = MagicMock()
|
||||
await _dep.deploy_topology(repo, topology_id)
|
||||
|
||||
assert recorded.get(uid) == "running", (
|
||||
"decky_config.name must be used for ps lookup; decky should reconcile running"
|
||||
)
|
||||
107
tests/intel/test_semaphore_enforcement.py
Normal file
107
tests/intel/test_semaphore_enforcement.py
Normal file
@@ -0,0 +1,107 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Regression test for BUG-3: provider semaphore was declared but never acquired.
|
||||
|
||||
The ``IntelProvider`` ABC creates ``self._semaphore = asyncio.Semaphore(self.concurrency)``
|
||||
in ``__init__``, but ``_enrich_one`` called ``p.lookup(ip)`` directly without
|
||||
acquiring the semaphore first — concurrency and rate limits were silently
|
||||
unenforced.
|
||||
|
||||
The fix wraps each ``p.lookup(ip)`` call with ``async with p._semaphore``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.intel.base import IntelProvider, IntelResult
|
||||
from decnet.intel.worker import _enrich_one
|
||||
|
||||
|
||||
class _OrderedProvider(IntelProvider):
|
||||
"""Test double that records order of entry/exit and enforces capacity == 1."""
|
||||
|
||||
concurrency = 1 # semaphore with N=1 forces serialization
|
||||
min_dispatch_interval_s = 0.0
|
||||
|
||||
def __init__(self, name: str, delay: float = 0.05) -> None:
|
||||
super().__init__()
|
||||
self.name = name
|
||||
self._delay = delay
|
||||
self.concurrent_high_watermark = 0
|
||||
self._in_flight = 0
|
||||
self.calls: list[str] = []
|
||||
|
||||
async def lookup(self, ip: str) -> IntelResult:
|
||||
self._in_flight += 1
|
||||
self.concurrent_high_watermark = max(
|
||||
self.concurrent_high_watermark, self._in_flight
|
||||
)
|
||||
self.calls.append(ip)
|
||||
await asyncio.sleep(self._delay)
|
||||
self._in_flight -= 1
|
||||
return IntelResult(
|
||||
provider=self.name,
|
||||
verdict="benign",
|
||||
column_updates={},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_semaphore_serializes_concurrent_callers() -> None:
|
||||
"""BUG-3 regression: N=1 semaphore must prevent >1 concurrent lookup.
|
||||
|
||||
We fire two _enrich_one calls concurrently against the same provider
|
||||
(concurrency=1). With the semaphore enforced, the provider's
|
||||
concurrent_high_watermark stays at 1. Without it, both calls would
|
||||
enter lookup simultaneously and the watermark would reach 2.
|
||||
"""
|
||||
provider = _OrderedProvider("test_provider", delay=0.05)
|
||||
|
||||
results = await asyncio.gather(
|
||||
_enrich_one("uuid-a", "1.1.1.1", [provider], ttl_hours=24),
|
||||
_enrich_one("uuid-b", "2.2.2.2", [provider], ttl_hours=24),
|
||||
)
|
||||
|
||||
assert len(results) == 2
|
||||
# Both callers should complete successfully.
|
||||
assert results[0]["attacker_uuid"] == "uuid-a"
|
||||
assert results[1]["attacker_uuid"] == "uuid-b"
|
||||
# The semaphore serialized access — never more than 1 in-flight at once.
|
||||
assert provider.concurrent_high_watermark == 1, (
|
||||
f"Semaphore not enforced: {provider.concurrent_high_watermark} concurrent "
|
||||
"calls observed, expected at most 1"
|
||||
)
|
||||
# Both IPs were looked up.
|
||||
assert set(provider.calls) == {"1.1.1.1", "2.2.2.2"}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_semaphore_with_higher_concurrency_allows_parallel() -> None:
|
||||
"""With concurrency=2, two callers may proceed simultaneously."""
|
||||
|
||||
class _Wide(IntelProvider):
|
||||
concurrency = 2
|
||||
min_dispatch_interval_s = 0.0
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.name = "wide"
|
||||
self._in_flight = 0
|
||||
self.watermark = 0
|
||||
|
||||
async def lookup(self, ip: str) -> IntelResult:
|
||||
self._in_flight += 1
|
||||
self.watermark = max(self.watermark, self._in_flight)
|
||||
await asyncio.sleep(0.05)
|
||||
self._in_flight -= 1
|
||||
return IntelResult(provider=self.name, verdict=None, column_updates={})
|
||||
|
||||
wide = _Wide()
|
||||
await asyncio.gather(
|
||||
_enrich_one("uuid-a", "1.1.1.1", [wide], ttl_hours=24),
|
||||
_enrich_one("uuid-b", "2.2.2.2", [wide], ttl_hours=24),
|
||||
)
|
||||
# With concurrency=2 both calls are allowed in simultaneously.
|
||||
assert wide.watermark == 2
|
||||
78
tests/orchestrator/test_driver_registry.py
Normal file
78
tests/orchestrator/test_driver_registry.py
Normal file
@@ -0,0 +1,78 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Regression tests for :func:`decnet.orchestrator.drivers.get_driver_for`.
|
||||
|
||||
BUG-1: EditAction had no registered driver — get_driver_for raised TypeError
|
||||
instead of returning an SSHDriver, silently crashing every edit tick.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.orchestrator.drivers import get_driver_for
|
||||
from decnet.orchestrator.drivers.ssh import SSHDriver
|
||||
from decnet.orchestrator.scheduler import EditAction, FileAction, TrafficAction
|
||||
|
||||
|
||||
def _traffic_action() -> TrafficAction:
|
||||
return TrafficAction(
|
||||
src_uuid="u1", src_name="decky-01",
|
||||
dst_uuid="u2", dst_name="decky-02",
|
||||
dst_ip="10.0.0.2",
|
||||
)
|
||||
|
||||
|
||||
def _file_action() -> FileAction:
|
||||
return FileAction(
|
||||
dst_uuid="u1", dst_name="decky-01",
|
||||
path="/tmp/test.txt",
|
||||
content="hello",
|
||||
)
|
||||
|
||||
|
||||
def _edit_action() -> EditAction:
|
||||
return EditAction(
|
||||
dst_uuid="u1", dst_name="decky-01",
|
||||
path="/tmp/notes.txt",
|
||||
persona="alice",
|
||||
content_class="note",
|
||||
previous_body="old content",
|
||||
synthetic_file_uuid="sf-uuid-001",
|
||||
)
|
||||
|
||||
|
||||
def test_traffic_action_resolves_to_ssh_driver() -> None:
|
||||
drv = get_driver_for(_traffic_action())
|
||||
assert isinstance(drv, SSHDriver)
|
||||
|
||||
|
||||
def test_file_action_resolves_to_ssh_driver() -> None:
|
||||
drv = get_driver_for(_file_action())
|
||||
assert isinstance(drv, SSHDriver)
|
||||
|
||||
|
||||
def test_edit_action_resolves_to_ssh_driver() -> None:
|
||||
"""BUG-1 regression: EditAction must resolve to SSHDriver, not TypeError."""
|
||||
# Before the fix this raised:
|
||||
# TypeError: no driver registered for action type EditAction
|
||||
drv = get_driver_for(_edit_action())
|
||||
assert isinstance(drv, SSHDriver)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_edit_action_driver_executes_without_crash(monkeypatch) -> None:
|
||||
"""BUG-1 regression: SSHDriver.run(EditAction) must not crash silently.
|
||||
|
||||
We mock _run to avoid needing a live Docker daemon; the important
|
||||
assertion is that the driver resolves, runs, and returns an ActivityResult.
|
||||
"""
|
||||
from decnet.orchestrator.drivers import ssh as ssh_driver
|
||||
from decnet.orchestrator.drivers.base import ActivityResult
|
||||
|
||||
async def fake_run(argv):
|
||||
return 0, "", ""
|
||||
|
||||
monkeypatch.setattr(ssh_driver, "_run", fake_run)
|
||||
|
||||
drv = get_driver_for(_edit_action())
|
||||
result = await drv.run(_edit_action())
|
||||
assert isinstance(result, ActivityResult)
|
||||
@@ -437,6 +437,43 @@ def test_check_marks_hosts_active(client: TestClient, stub_agent) -> None:
|
||||
assert one["last_heartbeat"] is not None
|
||||
|
||||
|
||||
# V7.1.1: a probe failure must mark the host unreachable WITHOUT leaking the
|
||||
# raw exception text (file paths, TLS internals) back to the caller.
|
||||
_LEAKY_PROBE_SECRET = "/etc/decnet/tls/worker-7.key: permission denied [TLSV1_ALERT]"
|
||||
|
||||
|
||||
class _LeakyAgentClient(_StubAgentClient):
|
||||
async def __aenter__(self) -> "_LeakyAgentClient":
|
||||
raise RuntimeError(_LEAKY_PROBE_SECRET)
|
||||
|
||||
|
||||
def test_check_unreachable_does_not_leak_exception_text(
|
||||
client: TestClient, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
from decnet.web.router.swarm import api_check_hosts as check_mod
|
||||
|
||||
monkeypatch.setattr(check_mod, "AgentClient", _LeakyAgentClient)
|
||||
|
||||
h = client.post(
|
||||
"/swarm/enroll",
|
||||
json={"name": "leaky-w", "address": "10.0.0.13", "agent_port": 8765},
|
||||
).json()
|
||||
|
||||
resp = client.post("/swarm/check")
|
||||
assert resp.status_code == 200
|
||||
results = resp.json()["results"]
|
||||
assert len(results) == 1
|
||||
assert results[0]["reachable"] is False
|
||||
# Generic message only — the internal exception string must be absent.
|
||||
assert results[0]["detail"] == "probe failed"
|
||||
assert _LEAKY_PROBE_SECRET not in resp.text
|
||||
assert "permission denied" not in resp.text
|
||||
|
||||
# The host is still correctly marked unreachable server-side.
|
||||
one = client.get(f"/swarm/hosts/{h['host_uuid']}").json()
|
||||
assert one["status"] == "unreachable"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- /deckies
|
||||
|
||||
|
||||
|
||||
@@ -278,6 +278,54 @@ def test_apply_change_continues_on_error(caplog: pytest.LogCaptureFixture) -> No
|
||||
assert idx.get("R_BAD") is None
|
||||
|
||||
|
||||
def test_install_evicts_stale_kinds_on_reinstall() -> None:
|
||||
"""BUG-4 regression: re-installing a rule with a narrower applies_to must
|
||||
remove the rule from kinds it no longer claims.
|
||||
|
||||
Before the fix, install() only added the rule to new kind-buckets;
|
||||
it never cleaned up the old buckets. A rule installed for {A, B} then
|
||||
re-installed for {A} would remain in B's bucket indefinitely.
|
||||
"""
|
||||
idx = RuleIndex()
|
||||
# First install: rule covers both "command" and "email" kinds.
|
||||
r1 = _rule("R_AB", applies_to=frozenset({"command", "email"}))
|
||||
idx.install(r1)
|
||||
assert idx.get("R_AB") is r1
|
||||
assert any(r.rule_id == "R_AB" for r in idx.by_kind("command"))
|
||||
assert any(r.rule_id == "R_AB" for r in idx.by_kind("email"))
|
||||
|
||||
# Re-install with a narrower set: only "command" now.
|
||||
r2 = _rule("R_AB", rule_version=2, applies_to=frozenset({"command"}))
|
||||
idx.install(r2)
|
||||
|
||||
assert idx.get("R_AB") is r2
|
||||
# Rule must still be in "command".
|
||||
assert any(r.rule_id == "R_AB" for r in idx.by_kind("command"))
|
||||
# BUG-4: rule must NO LONGER be in "email" — the stale entry must be evicted.
|
||||
assert not any(r.rule_id == "R_AB" for r in idx.by_kind("email")), (
|
||||
"Stale kind bucket 'email' still contains R_AB after re-install with "
|
||||
"applies_to={command}; eviction on reinstall is broken"
|
||||
)
|
||||
|
||||
|
||||
def test_install_eviction_does_not_affect_other_rules_in_same_kind() -> None:
|
||||
"""Evicting stale kinds for one rule must leave other rules in those kinds intact."""
|
||||
idx = RuleIndex()
|
||||
r_ab = _rule("R_AB", applies_to=frozenset({"command", "email"}))
|
||||
r_email = _rule("R_EMAIL_ONLY", applies_to=frozenset({"email"}))
|
||||
idx.install(r_ab)
|
||||
idx.install(r_email)
|
||||
|
||||
# Re-install R_AB without "email".
|
||||
r_ab_v2 = _rule("R_AB", rule_version=2, applies_to=frozenset({"command"}))
|
||||
idx.install(r_ab_v2)
|
||||
|
||||
# R_EMAIL_ONLY must still be in "email".
|
||||
assert any(r.rule_id == "R_EMAIL_ONLY" for r in idx.by_kind("email"))
|
||||
# R_AB must be gone from "email".
|
||||
assert not any(r.rule_id == "R_AB" for r in idx.by_kind("email"))
|
||||
|
||||
|
||||
def test_expired_state_treated_as_disabled_by_is_active() -> None:
|
||||
"""Sanity check on the helper used by both engine and lifters."""
|
||||
from decnet.ttp.impl._state import is_active
|
||||
|
||||
Reference in New Issue
Block a user