feat(orchestrator): MVP synthetic life-injection worker (SSH only)
Adds a new decnet orchestrate worker whose job is to keep the honeypot
ecosystem from looking suspiciously static — a frozen LAN with no
inter-host traffic and no filesystem aging is its own honeypot tell.
MVP scope:
- New OrchestratorEvent table + repo methods (purpose-built sibling
to Log so synthetic events stay separable from attacker-driven ones).
- New orchestrator.{activity,file}.<decky_id> bus topics +
system.orchestrator.health heartbeat.
- SSH-only driver. Traffic action runs python3 inside src container
to TCP-connect dst:22 and read the SSH banner — real on-the-wire
SSH-protocol traffic without shipping creds. File action drops or
refreshes a small file via docker exec on the destination.
- Random scheduler (50/50 traffic/file when >=2 SSH-capable deckies
are running). Diurnal shaping, role-aware pairing, and session-aware
backoff are explicit non-goals for MVP.
- CLI registration, systemd unit (SupplementaryGroups=docker),
worker-registry entry so the dashboard shows orchestrator health.
- 11 tests: scheduler policy, driver argv shape + injection-safety,
end-to-end one-tick integration with FakeBus + SQLite.
This commit is contained in:
0
tests/orchestrator/__init__.py
Normal file
0
tests/orchestrator/__init__.py
Normal file
60
tests/orchestrator/test_scheduler.py
Normal file
60
tests/orchestrator/test_scheduler.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""Picker policy tests for the orchestrator scheduler."""
|
||||
from __future__ import annotations
|
||||
|
||||
import secrets
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.orchestrator import scheduler
|
||||
|
||||
|
||||
def _decky(uuid: str, name: str, ip: str | None, services: list[str] | str):
|
||||
return {"uuid": uuid, "name": name, "ip": ip, "services": services}
|
||||
|
||||
|
||||
def test_pick_returns_none_when_no_ssh_deckies():
|
||||
deckies = [
|
||||
_decky("u1", "decky-01", "10.0.0.1", ["http"]),
|
||||
_decky("u2", "decky-02", "10.0.0.2", ["smb"]),
|
||||
]
|
||||
assert scheduler.pick(deckies) is None
|
||||
|
||||
|
||||
def test_pick_returns_none_when_ssh_decky_has_no_ip():
|
||||
deckies = [_decky("u1", "decky-01", None, ["ssh"])]
|
||||
assert scheduler.pick(deckies) is None
|
||||
|
||||
|
||||
def test_pick_file_action_with_single_ssh_decky():
|
||||
deckies = [_decky("u1", "decky-01", "10.0.0.1", ["ssh"])]
|
||||
rng = secrets.SystemRandom()
|
||||
rng.seed = lambda *_: None # SystemRandom doesn't seed; ignore
|
||||
action = scheduler.pick(deckies, rand=rng)
|
||||
assert isinstance(action, scheduler.FileAction)
|
||||
assert action.dst_uuid == "u1"
|
||||
assert action.path.startswith("/")
|
||||
assert action.content
|
||||
|
||||
|
||||
def test_pick_traffic_or_file_with_two_ssh_deckies():
|
||||
deckies = [
|
||||
_decky("u1", "decky-01", "10.0.0.1", ["ssh"]),
|
||||
_decky("u2", "decky-02", "10.0.0.2", ["ssh"]),
|
||||
]
|
||||
seen_kinds: set[str] = set()
|
||||
# 50/50 split — 40 trials makes both kinds essentially certain
|
||||
for _ in range(40):
|
||||
action = scheduler.pick(deckies)
|
||||
assert action is not None
|
||||
seen_kinds.add("traffic" if isinstance(action, scheduler.TrafficAction) else "file")
|
||||
if isinstance(action, scheduler.TrafficAction):
|
||||
assert action.src_uuid != action.dst_uuid
|
||||
assert action.dst_ip in {"10.0.0.1", "10.0.0.2"}
|
||||
assert action.protocol == "ssh"
|
||||
assert seen_kinds == {"traffic", "file"}
|
||||
|
||||
|
||||
def test_pick_skips_non_deserialised_services():
|
||||
"""If services is still a JSON string (defensive), the decky is excluded."""
|
||||
deckies = [_decky("u1", "decky-01", "10.0.0.1", '["ssh"]')]
|
||||
assert scheduler.pick(deckies) is None
|
||||
99
tests/orchestrator/test_ssh_driver.py
Normal file
99
tests/orchestrator/test_ssh_driver.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""Driver tests with the docker subprocess mocked.
|
||||
|
||||
We don't need a real Docker daemon to validate the driver's contract:
|
||||
it boils down to "build an argv, call _run, classify the result". A
|
||||
dependency-injected ``_run`` keeps the tests hermetic.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.orchestrator.drivers import ssh as ssh_driver
|
||||
from decnet.orchestrator.drivers.base import ActivityResult
|
||||
from decnet.orchestrator.scheduler import FileAction, TrafficAction
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_traffic_success_classifies_on_ssh_banner(monkeypatch):
|
||||
captured_argv: list[list[str]] = []
|
||||
|
||||
async def fake_run(argv):
|
||||
captured_argv.append(argv)
|
||||
return 0, "SSH-2.0-OpenSSH_9.6\r\n", ""
|
||||
|
||||
monkeypatch.setattr(ssh_driver, "_run", fake_run)
|
||||
drv = ssh_driver.SSHDriver()
|
||||
action = TrafficAction(
|
||||
src_uuid="u1", src_name="decky-01",
|
||||
dst_uuid="u2", dst_name="decky-02",
|
||||
dst_ip="10.0.0.2",
|
||||
)
|
||||
result = await drv.run(action)
|
||||
assert isinstance(result, ActivityResult)
|
||||
assert result.success is True
|
||||
assert result.payload["banner"].startswith("SSH-2.0-OpenSSH")
|
||||
assert captured_argv[0][:3] == ["docker", "exec", "decky-01-ssh"]
|
||||
assert captured_argv[0][-1] == "10.0.0.2"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_traffic_failure_when_banner_missing(monkeypatch):
|
||||
async def fake_run(argv):
|
||||
return 1, "", "Connection refused"
|
||||
|
||||
monkeypatch.setattr(ssh_driver, "_run", fake_run)
|
||||
drv = ssh_driver.SSHDriver()
|
||||
action = TrafficAction(
|
||||
src_uuid="u1", src_name="decky-01",
|
||||
dst_uuid="u2", dst_name="decky-02",
|
||||
dst_ip="10.0.0.2",
|
||||
)
|
||||
result = await drv.run(action)
|
||||
assert result.success is False
|
||||
assert result.payload["rc"] == 1
|
||||
assert "Connection refused" in result.payload["stderr"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_action_invokes_docker_exec_on_dst(monkeypatch):
|
||||
captured_argv: list[list[str]] = []
|
||||
|
||||
async def fake_run(argv):
|
||||
captured_argv.append(argv)
|
||||
return 0, "", ""
|
||||
|
||||
monkeypatch.setattr(ssh_driver, "_run", fake_run)
|
||||
drv = ssh_driver.SSHDriver()
|
||||
action = FileAction(
|
||||
dst_uuid="u2", dst_name="decky-02",
|
||||
path="/tmp/.cache-1700000000.tmp",
|
||||
content="session=1700000000\n",
|
||||
)
|
||||
result = await drv.run(action)
|
||||
assert result.success is True
|
||||
assert result.payload["bytes"] == len("session=1700000000\n".encode())
|
||||
argv = captured_argv[0]
|
||||
assert argv[:3] == ["docker", "exec", "decky-02-ssh"]
|
||||
assert argv[3] == "sh"
|
||||
assert argv[4] == "-c"
|
||||
# The shell payload must single-quote both the content and the path —
|
||||
# any unquoted ``;`` or ``$`` here would mean a shell-injection bug.
|
||||
sh_cmd = argv[5]
|
||||
# Path appears (shlex.quote leaves safe paths unquoted) and content
|
||||
# is single-quoted — that's the shell-injection-safe contract.
|
||||
assert "/tmp/.cache-1700000000.tmp" in sh_cmd
|
||||
assert "'session=1700000000\n'" in sh_cmd
|
||||
assert "mkdir -p /tmp" in sh_cmd
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_handles_missing_docker_binary(monkeypatch):
|
||||
async def fake_create(*args, **kwargs):
|
||||
raise FileNotFoundError("docker")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"asyncio.create_subprocess_exec", fake_create,
|
||||
)
|
||||
rc, out, err = await ssh_driver._run(["docker", "exec", "x", "true"])
|
||||
assert rc == 127
|
||||
assert "not found" in err
|
||||
123
tests/orchestrator/test_worker_integration.py
Normal file
123
tests/orchestrator/test_worker_integration.py
Normal file
@@ -0,0 +1,123 @@
|
||||
"""End-to-end-ish: run one orchestrator tick against a real SQLite repo +
|
||||
FakeBus, with the docker subprocess stubbed. Verifies that:
|
||||
|
||||
* :func:`scheduler.pick` reads the deckies the repo returns,
|
||||
* the driver result is persisted to ``orchestrator_events``,
|
||||
* a bus event is published to the right topic.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from decnet.bus.fake import FakeBus
|
||||
from decnet.orchestrator import worker as orch_worker
|
||||
from decnet.orchestrator.drivers import ssh as ssh_driver
|
||||
from decnet.web.db.models import TopologyDecky, Topology
|
||||
from decnet.web.db.sqlite.repository import SQLiteRepository
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def repo(tmp_path):
|
||||
r = SQLiteRepository(db_path=str(tmp_path / "decnet.db"))
|
||||
await r.initialize()
|
||||
yield r
|
||||
await r.engine.dispose()
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def fake_bus():
|
||||
bus = FakeBus()
|
||||
await bus.connect()
|
||||
try:
|
||||
yield bus
|
||||
finally:
|
||||
await bus.close()
|
||||
|
||||
|
||||
async def _seed_two_running_ssh_deckies(repo: SQLiteRepository) -> tuple[str, str]:
|
||||
async with repo._session() as session:
|
||||
topo = Topology(name="t1", config_snapshot="{}", status="active")
|
||||
session.add(topo)
|
||||
await session.commit()
|
||||
await session.refresh(topo)
|
||||
d1 = TopologyDecky(
|
||||
topology_id=topo.id, name="decky-01",
|
||||
services=json.dumps(["ssh"]), ip="10.0.0.1", state="running",
|
||||
)
|
||||
d2 = TopologyDecky(
|
||||
topology_id=topo.id, name="decky-02",
|
||||
services=json.dumps(["ssh"]), ip="10.0.0.2", state="running",
|
||||
)
|
||||
session.add(d1)
|
||||
session.add(d2)
|
||||
await session.commit()
|
||||
await session.refresh(d1)
|
||||
await session.refresh(d2)
|
||||
return d1.uuid, d2.uuid
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_one_tick_records_event_and_publishes(repo, fake_bus, monkeypatch):
|
||||
await _seed_two_running_ssh_deckies(repo)
|
||||
|
||||
# Pretend every docker exec succeeds with an SSH banner; that lets
|
||||
# both action kinds (traffic + file) land as success rows so the
|
||||
# assertions below don't have to care which one the scheduler picked.
|
||||
async def fake_run(argv):
|
||||
if argv[3] == "python3":
|
||||
return 0, "SSH-2.0-OpenSSH_9.6\r\n", ""
|
||||
return 0, "", ""
|
||||
|
||||
monkeypatch.setattr(ssh_driver, "_run", fake_run)
|
||||
|
||||
received: list = []
|
||||
|
||||
async def collect():
|
||||
async with fake_bus.subscribe("orchestrator.>") as sub:
|
||||
async for ev in sub:
|
||||
received.append(ev)
|
||||
if len(received) >= 1:
|
||||
return
|
||||
|
||||
import asyncio
|
||||
collector = asyncio.create_task(collect())
|
||||
# Yield once so the subscription is registered before we publish.
|
||||
await asyncio.sleep(0)
|
||||
|
||||
driver = ssh_driver.SSHDriver()
|
||||
await orch_worker._one_tick(repo, driver, fake_bus)
|
||||
|
||||
await asyncio.wait_for(collector, timeout=2.0)
|
||||
|
||||
rows = await repo.list_orchestrator_events(limit=10)
|
||||
assert len(rows) == 1
|
||||
row = rows[0]
|
||||
assert row["success"] is True
|
||||
assert row["protocol"] == "ssh"
|
||||
assert row["kind"] in {"traffic", "file"}
|
||||
|
||||
assert len(received) == 1
|
||||
ev = received[0]
|
||||
assert ev.topic.startswith("orchestrator.")
|
||||
assert ev.payload["success"] is True
|
||||
assert ev.payload["kind"] == row["kind"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tick_is_noop_when_no_running_deckies(repo, fake_bus, monkeypatch):
|
||||
called = False
|
||||
|
||||
async def fake_run(argv):
|
||||
nonlocal called
|
||||
called = True
|
||||
return 0, "SSH-2.0-foo", ""
|
||||
|
||||
monkeypatch.setattr(ssh_driver, "_run", fake_run)
|
||||
driver = ssh_driver.SSHDriver()
|
||||
await orch_worker._one_tick(repo, driver, fake_bus)
|
||||
|
||||
assert called is False
|
||||
assert await repo.list_orchestrator_events(limit=10) == []
|
||||
Reference in New Issue
Block a user