feat(orchestrator): MVP synthetic life-injection worker (SSH only)

Adds a new decnet orchestrate worker whose job is to keep the honeypot
ecosystem from looking suspiciously static — a frozen LAN with no
inter-host traffic and no filesystem aging is its own honeypot tell.

MVP scope:
- New OrchestratorEvent table + repo methods (purpose-built sibling
  to Log so synthetic events stay separable from attacker-driven ones).
- New orchestrator.{activity,file}.<decky_id> bus topics +
  system.orchestrator.health heartbeat.
- SSH-only driver. Traffic action runs python3 inside src container
  to TCP-connect dst:22 and read the SSH banner — real on-the-wire
  SSH-protocol traffic without shipping creds. File action drops or
  refreshes a small file via docker exec on the destination.
- Random scheduler (50/50 traffic/file when >=2 SSH-capable deckies
  are running). Diurnal shaping, role-aware pairing, and session-aware
  backoff are explicit non-goals for MVP.
- CLI registration, systemd unit (SupplementaryGroups=docker),
  worker-registry entry so the dashboard shows orchestrator health.
- 11 tests: scheduler policy, driver argv shape + injection-safety,
  end-to-end one-tick integration with FakeBus + SQLite.
This commit is contained in:
2026-04-26 19:43:20 -04:00
parent cc2deb73f7
commit 4c37ece39e
21 changed files with 972 additions and 1 deletions

View File

View File

@@ -0,0 +1,60 @@
"""Picker policy tests for the orchestrator scheduler."""
from __future__ import annotations
import secrets
import pytest
from decnet.orchestrator import scheduler
def _decky(uuid: str, name: str, ip: str | None, services: list[str] | str):
return {"uuid": uuid, "name": name, "ip": ip, "services": services}
def test_pick_returns_none_when_no_ssh_deckies():
deckies = [
_decky("u1", "decky-01", "10.0.0.1", ["http"]),
_decky("u2", "decky-02", "10.0.0.2", ["smb"]),
]
assert scheduler.pick(deckies) is None
def test_pick_returns_none_when_ssh_decky_has_no_ip():
deckies = [_decky("u1", "decky-01", None, ["ssh"])]
assert scheduler.pick(deckies) is None
def test_pick_file_action_with_single_ssh_decky():
deckies = [_decky("u1", "decky-01", "10.0.0.1", ["ssh"])]
rng = secrets.SystemRandom()
rng.seed = lambda *_: None # SystemRandom doesn't seed; ignore
action = scheduler.pick(deckies, rand=rng)
assert isinstance(action, scheduler.FileAction)
assert action.dst_uuid == "u1"
assert action.path.startswith("/")
assert action.content
def test_pick_traffic_or_file_with_two_ssh_deckies():
deckies = [
_decky("u1", "decky-01", "10.0.0.1", ["ssh"]),
_decky("u2", "decky-02", "10.0.0.2", ["ssh"]),
]
seen_kinds: set[str] = set()
# 50/50 split — 40 trials makes both kinds essentially certain
for _ in range(40):
action = scheduler.pick(deckies)
assert action is not None
seen_kinds.add("traffic" if isinstance(action, scheduler.TrafficAction) else "file")
if isinstance(action, scheduler.TrafficAction):
assert action.src_uuid != action.dst_uuid
assert action.dst_ip in {"10.0.0.1", "10.0.0.2"}
assert action.protocol == "ssh"
assert seen_kinds == {"traffic", "file"}
def test_pick_skips_non_deserialised_services():
"""If services is still a JSON string (defensive), the decky is excluded."""
deckies = [_decky("u1", "decky-01", "10.0.0.1", '["ssh"]')]
assert scheduler.pick(deckies) is None

View File

@@ -0,0 +1,99 @@
"""Driver tests with the docker subprocess mocked.
We don't need a real Docker daemon to validate the driver's contract:
it boils down to "build an argv, call _run, classify the result". A
dependency-injected ``_run`` keeps the tests hermetic.
"""
from __future__ import annotations
import pytest
from decnet.orchestrator.drivers import ssh as ssh_driver
from decnet.orchestrator.drivers.base import ActivityResult
from decnet.orchestrator.scheduler import FileAction, TrafficAction
@pytest.mark.asyncio
async def test_traffic_success_classifies_on_ssh_banner(monkeypatch):
captured_argv: list[list[str]] = []
async def fake_run(argv):
captured_argv.append(argv)
return 0, "SSH-2.0-OpenSSH_9.6\r\n", ""
monkeypatch.setattr(ssh_driver, "_run", fake_run)
drv = ssh_driver.SSHDriver()
action = TrafficAction(
src_uuid="u1", src_name="decky-01",
dst_uuid="u2", dst_name="decky-02",
dst_ip="10.0.0.2",
)
result = await drv.run(action)
assert isinstance(result, ActivityResult)
assert result.success is True
assert result.payload["banner"].startswith("SSH-2.0-OpenSSH")
assert captured_argv[0][:3] == ["docker", "exec", "decky-01-ssh"]
assert captured_argv[0][-1] == "10.0.0.2"
@pytest.mark.asyncio
async def test_traffic_failure_when_banner_missing(monkeypatch):
async def fake_run(argv):
return 1, "", "Connection refused"
monkeypatch.setattr(ssh_driver, "_run", fake_run)
drv = ssh_driver.SSHDriver()
action = TrafficAction(
src_uuid="u1", src_name="decky-01",
dst_uuid="u2", dst_name="decky-02",
dst_ip="10.0.0.2",
)
result = await drv.run(action)
assert result.success is False
assert result.payload["rc"] == 1
assert "Connection refused" in result.payload["stderr"]
@pytest.mark.asyncio
async def test_file_action_invokes_docker_exec_on_dst(monkeypatch):
captured_argv: list[list[str]] = []
async def fake_run(argv):
captured_argv.append(argv)
return 0, "", ""
monkeypatch.setattr(ssh_driver, "_run", fake_run)
drv = ssh_driver.SSHDriver()
action = FileAction(
dst_uuid="u2", dst_name="decky-02",
path="/tmp/.cache-1700000000.tmp",
content="session=1700000000\n",
)
result = await drv.run(action)
assert result.success is True
assert result.payload["bytes"] == len("session=1700000000\n".encode())
argv = captured_argv[0]
assert argv[:3] == ["docker", "exec", "decky-02-ssh"]
assert argv[3] == "sh"
assert argv[4] == "-c"
# The shell payload must single-quote both the content and the path —
# any unquoted ``;`` or ``$`` here would mean a shell-injection bug.
sh_cmd = argv[5]
# Path appears (shlex.quote leaves safe paths unquoted) and content
# is single-quoted — that's the shell-injection-safe contract.
assert "/tmp/.cache-1700000000.tmp" in sh_cmd
assert "'session=1700000000\n'" in sh_cmd
assert "mkdir -p /tmp" in sh_cmd
@pytest.mark.asyncio
async def test_run_handles_missing_docker_binary(monkeypatch):
async def fake_create(*args, **kwargs):
raise FileNotFoundError("docker")
monkeypatch.setattr(
"asyncio.create_subprocess_exec", fake_create,
)
rc, out, err = await ssh_driver._run(["docker", "exec", "x", "true"])
assert rc == 127
assert "not found" in err

View File

@@ -0,0 +1,123 @@
"""End-to-end-ish: run one orchestrator tick against a real SQLite repo +
FakeBus, with the docker subprocess stubbed. Verifies that:
* :func:`scheduler.pick` reads the deckies the repo returns,
* the driver result is persisted to ``orchestrator_events``,
* a bus event is published to the right topic.
"""
from __future__ import annotations
import json
import pytest
import pytest_asyncio
from decnet.bus.fake import FakeBus
from decnet.orchestrator import worker as orch_worker
from decnet.orchestrator.drivers import ssh as ssh_driver
from decnet.web.db.models import TopologyDecky, Topology
from decnet.web.db.sqlite.repository import SQLiteRepository
@pytest_asyncio.fixture
async def repo(tmp_path):
r = SQLiteRepository(db_path=str(tmp_path / "decnet.db"))
await r.initialize()
yield r
await r.engine.dispose()
@pytest_asyncio.fixture
async def fake_bus():
bus = FakeBus()
await bus.connect()
try:
yield bus
finally:
await bus.close()
async def _seed_two_running_ssh_deckies(repo: SQLiteRepository) -> tuple[str, str]:
async with repo._session() as session:
topo = Topology(name="t1", config_snapshot="{}", status="active")
session.add(topo)
await session.commit()
await session.refresh(topo)
d1 = TopologyDecky(
topology_id=topo.id, name="decky-01",
services=json.dumps(["ssh"]), ip="10.0.0.1", state="running",
)
d2 = TopologyDecky(
topology_id=topo.id, name="decky-02",
services=json.dumps(["ssh"]), ip="10.0.0.2", state="running",
)
session.add(d1)
session.add(d2)
await session.commit()
await session.refresh(d1)
await session.refresh(d2)
return d1.uuid, d2.uuid
@pytest.mark.asyncio
async def test_one_tick_records_event_and_publishes(repo, fake_bus, monkeypatch):
await _seed_two_running_ssh_deckies(repo)
# Pretend every docker exec succeeds with an SSH banner; that lets
# both action kinds (traffic + file) land as success rows so the
# assertions below don't have to care which one the scheduler picked.
async def fake_run(argv):
if argv[3] == "python3":
return 0, "SSH-2.0-OpenSSH_9.6\r\n", ""
return 0, "", ""
monkeypatch.setattr(ssh_driver, "_run", fake_run)
received: list = []
async def collect():
async with fake_bus.subscribe("orchestrator.>") as sub:
async for ev in sub:
received.append(ev)
if len(received) >= 1:
return
import asyncio
collector = asyncio.create_task(collect())
# Yield once so the subscription is registered before we publish.
await asyncio.sleep(0)
driver = ssh_driver.SSHDriver()
await orch_worker._one_tick(repo, driver, fake_bus)
await asyncio.wait_for(collector, timeout=2.0)
rows = await repo.list_orchestrator_events(limit=10)
assert len(rows) == 1
row = rows[0]
assert row["success"] is True
assert row["protocol"] == "ssh"
assert row["kind"] in {"traffic", "file"}
assert len(received) == 1
ev = received[0]
assert ev.topic.startswith("orchestrator.")
assert ev.payload["success"] is True
assert ev.payload["kind"] == row["kind"]
@pytest.mark.asyncio
async def test_tick_is_noop_when_no_running_deckies(repo, fake_bus, monkeypatch):
called = False
async def fake_run(argv):
nonlocal called
called = True
return 0, "SSH-2.0-foo", ""
monkeypatch.setattr(ssh_driver, "_run", fake_run)
driver = ssh_driver.SSHDriver()
await orch_worker._one_tick(repo, driver, fake_bus)
assert called is False
assert await repo.list_orchestrator_events(limit=10) == []