merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

View File

@@ -20,7 +20,7 @@ from pathlib import Path
import pytest
_REPO_ROOT = Path(__file__).parent.parent.parent
_TEMPLATES = _REPO_ROOT / "templates"
_TEMPLATES = _REPO_ROOT / "decnet" / "templates"
# Prefer the project venv's Python (has Flask, Twisted, etc.) over system Python
_VENV_PYTHON = _REPO_ROOT / ".venv" / "bin" / "python"
@@ -30,6 +30,13 @@ _PYTHON = str(_VENV_PYTHON) if _VENV_PYTHON.exists() else sys.executable
# Use search (not match) so lines prefixed by Twisted timestamps are handled.
_RFC5424_RE = re.compile(r"<\d+>1 \S+ \S+ \S+ - \S+ ")
def _mysql_available() -> bool:
try:
s = socket.create_connection(("127.0.0.1", 3307), timeout=1)
s.close()
return True
except OSError:
return False
def _free_port() -> int:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
@@ -90,7 +97,7 @@ def assert_rfc5424(
class _ServiceProcess:
"""Manages a live service subprocess and its stdout log queue."""
def __init__(self, service: str, port: int):
def __init__(self, service: str, port: int, extra_env: dict | None = None):
template_dir = _TEMPLATES / service
env = {
**os.environ,
@@ -99,6 +106,8 @@ class _ServiceProcess:
"PYTHONPATH": str(template_dir),
"LOG_TARGET": "",
}
if extra_env:
env.update(extra_env)
self._proc = subprocess.Popen(
[_PYTHON, str(template_dir / "server.py")],
cwd=str(template_dir),
@@ -143,9 +152,9 @@ def live_service() -> Generator:
"""
started: list[_ServiceProcess] = []
def _start(service: str) -> tuple[int, callable]:
def _start(service: str, env: dict | None = None) -> tuple[int, callable]:
port = _free_port()
svc = _ServiceProcess(service, port)
svc = _ServiceProcess(service, port, extra_env=env)
started.append(svc)
if not _wait_for_port(port):
svc.stop()

View File

@@ -0,0 +1,248 @@
"""
Live health endpoint tests.
Starts the real FastAPI application via ASGI transport with background workers
disabled (DECNET_CONTRACT_TEST=true). Validates the /health endpoint reports
accurate component status against real system state — no mocks.
Run: pytest -m live tests/live/test_health_live.py -v
"""
import asyncio
import os
from unittest.mock import MagicMock
import httpx
import pytest
# Must be set before any decnet import
os.environ.setdefault("DECNET_JWT_SECRET", "test-secret-key-at-least-32-chars-long!!")
os.environ.setdefault("DECNET_ADMIN_PASSWORD", "test-password-123")
os.environ["DECNET_CONTRACT_TEST"] = "true"
from decnet.web.api import app, get_background_tasks # noqa: E402
from decnet.web.dependencies import repo # noqa: E402
from decnet.web.db.models import User # noqa: E402
from decnet.web.auth import get_password_hash # noqa: E402
from decnet.env import DECNET_ADMIN_USER, DECNET_ADMIN_PASSWORD # noqa: E402
from sqlmodel import SQLModel # noqa: E402
from sqlalchemy import select # noqa: E402
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine # noqa: E402
from sqlalchemy.pool import StaticPool # noqa: E402
import uuid as _uuid # noqa: E402
@pytest.fixture(scope="module")
def event_loop():
loop = asyncio.new_event_loop()
yield loop
loop.close()
@pytest.fixture(scope="module", autouse=True)
async def live_db():
"""Spin up an in-memory SQLite for the live test module."""
engine = create_async_engine(
"sqlite+aiosqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
repo.engine = engine
repo.session_factory = session_factory
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
async with session_factory() as session:
existing = await session.execute(
select(User).where(User.username == DECNET_ADMIN_USER)
)
if not existing.scalar_one_or_none():
session.add(User(
uuid=str(_uuid.uuid4()),
username=DECNET_ADMIN_USER,
password_hash=get_password_hash(DECNET_ADMIN_PASSWORD),
role="admin",
must_change_password=False,
))
await session.commit()
yield
await engine.dispose()
@pytest.fixture(scope="module")
async def live_client(live_db):
async with httpx.AsyncClient(
transport=httpx.ASGITransport(app=app),
base_url="http://test",
) as ac:
yield ac
@pytest.fixture(scope="module")
async def token(live_client):
resp = await live_client.post("/api/v1/auth/login", json={
"username": DECNET_ADMIN_USER,
"password": DECNET_ADMIN_PASSWORD,
})
return resp.json()["access_token"]
# ─── Tests ───────────────────────────────────────────────────────────────────
@pytest.mark.live
class TestHealthLive:
"""Live integration tests — real DB, real Docker check, real task state."""
async def test_endpoint_reachable_and_authenticated(self, live_client, token):
"""Health endpoint exists and enforces auth."""
resp = await live_client.get("/api/v1/health")
assert resp.status_code == 401
resp = await live_client.get(
"/api/v1/health",
headers={"Authorization": f"Bearer {token}"},
)
assert resp.status_code in (200, 503)
async def test_response_contains_all_components(self, live_client, token):
"""Every expected component appears in the response."""
resp = await live_client.get(
"/api/v1/health",
headers={"Authorization": f"Bearer {token}"},
)
data = resp.json()
expected = {"database", "ingestion_worker", "collector_worker",
"attacker_worker", "sniffer_worker", "docker"}
assert set(data["components"].keys()) == expected
async def test_database_healthy_with_real_db(self, live_client, token):
"""With a real (in-memory) SQLite, database component should be ok."""
resp = await live_client.get(
"/api/v1/health",
headers={"Authorization": f"Bearer {token}"},
)
assert resp.json()["components"]["database"]["status"] == "ok"
async def test_workers_report_not_started_in_contract_mode(self, live_client, token):
"""In contract-test mode workers are skipped, so they report failing."""
resp = await live_client.get(
"/api/v1/health",
headers={"Authorization": f"Bearer {token}"},
)
data = resp.json()
for worker in ("ingestion_worker", "collector_worker", "attacker_worker"):
comp = data["components"][worker]
assert comp["status"] == "failing", f"{worker} should be failing"
assert comp["detail"] is not None
async def test_overall_status_reflects_worker_state(self, live_client, token):
"""With workers not started, overall status should be unhealthy (503)."""
resp = await live_client.get(
"/api/v1/health",
headers={"Authorization": f"Bearer {token}"},
)
assert resp.status_code == 503
assert resp.json()["status"] == "unhealthy"
async def test_docker_component_reports_real_state(self, live_client, token):
"""Docker component reflects whether Docker daemon is actually reachable."""
resp = await live_client.get(
"/api/v1/health",
headers={"Authorization": f"Bearer {token}"},
)
docker_comp = resp.json()["components"]["docker"]
# We don't assert ok or failing — just that it reported honestly
assert docker_comp["status"] in ("ok", "failing")
if docker_comp["status"] == "failing":
assert docker_comp["detail"] is not None
async def test_component_status_values_are_valid(self, live_client, token):
"""Every component status is either 'ok' or 'failing'."""
resp = await live_client.get(
"/api/v1/health",
headers={"Authorization": f"Bearer {token}"},
)
for name, comp in resp.json()["components"].items():
assert comp["status"] in ("ok", "failing"), f"{name} has invalid status"
async def test_status_transitions_with_simulated_recovery(self, live_client, token):
"""Simulate workers coming alive and verify status improves."""
import decnet.web.api as api_mod
# Snapshot original task state
orig = {
"ingestion": api_mod.ingestion_task,
"collector": api_mod.collector_task,
"attacker": api_mod.attacker_task,
"sniffer": api_mod.sniffer_task,
}
try:
# Simulate all workers running
for attr in ("ingestion_task", "collector_task", "attacker_task", "sniffer_task"):
fake = MagicMock(spec=asyncio.Task)
fake.done.return_value = False
setattr(api_mod, attr, fake)
resp = await live_client.get(
"/api/v1/health",
headers={"Authorization": f"Bearer {token}"},
)
data = resp.json()
# Workers should now be ok; overall depends on docker too
for w in ("ingestion_worker", "collector_worker", "attacker_worker", "sniffer_worker"):
assert data["components"][w]["status"] == "ok"
finally:
# Restore original state
api_mod.ingestion_task = orig["ingestion"]
api_mod.collector_task = orig["collector"]
api_mod.attacker_task = orig["attacker"]
api_mod.sniffer_task = orig["sniffer"]
async def test_degraded_when_only_sniffer_fails(self, live_client, token):
"""If only the sniffer is down but everything else is up, status is degraded."""
import decnet.web.api as api_mod
orig = {
"ingestion": api_mod.ingestion_task,
"collector": api_mod.collector_task,
"attacker": api_mod.attacker_task,
"sniffer": api_mod.sniffer_task,
}
try:
# All required workers running
for attr in ("ingestion_task", "collector_task", "attacker_task"):
fake = MagicMock(spec=asyncio.Task)
fake.done.return_value = False
setattr(api_mod, attr, fake)
# Sniffer explicitly not running
api_mod.sniffer_task = None
resp = await live_client.get(
"/api/v1/health",
headers={"Authorization": f"Bearer {token}"},
)
data = resp.json()
# Docker may or may not be available — if docker is failing,
# overall will be unhealthy, not degraded. Account for both.
if data["components"]["docker"]["status"] == "ok":
assert data["status"] == "degraded"
assert resp.status_code == 200
else:
assert data["status"] == "unhealthy"
assert data["components"]["sniffer_worker"]["status"] == "failing"
finally:
api_mod.ingestion_task = orig["ingestion"]
api_mod.collector_task = orig["collector"]
api_mod.attacker_task = orig["attacker"]
api_mod.sniffer_task = orig["sniffer"]

View File

@@ -0,0 +1,190 @@
import os
import queue
import socket
import ssl
import subprocess
import sys
import tempfile
import threading
import time
from pathlib import Path
import pytest
import requests
from urllib3.exceptions import InsecureRequestWarning
from tests.live.conftest import assert_rfc5424
_REPO_ROOT = Path(__file__).parent.parent.parent
_TEMPLATES = _REPO_ROOT / "decnet" / "templates"
_VENV_PYTHON = _REPO_ROOT / ".venv" / "bin" / "python"
_PYTHON = str(_VENV_PYTHON) if _VENV_PYTHON.exists() else sys.executable
def _free_port() -> int:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("127.0.0.1", 0))
return s.getsockname()[1]
def _wait_for_tls_port(port: int, timeout: float = 10.0) -> bool:
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
try:
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
with socket.create_connection(("127.0.0.1", port), timeout=0.5) as sock:
with ctx.wrap_socket(sock, server_hostname="127.0.0.1"):
return True
except (OSError, ssl.SSLError):
time.sleep(0.1)
return False
def _drain(q: queue.Queue, timeout: float = 2.0) -> list[str]:
lines: list[str] = []
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
try:
lines.append(q.get(timeout=max(0.01, deadline - time.monotonic())))
except queue.Empty:
break
return lines
def _generate_self_signed_cert(cert_path: str, key_path: str) -> None:
subprocess.run(
[
"openssl", "req", "-x509", "-newkey", "rsa:2048", "-nodes",
"-keyout", key_path, "-out", cert_path,
"-days", "1", "-subj", "/CN=localhost",
],
check=True,
capture_output=True,
)
class _HTTPSServiceProcess:
"""Manages an HTTPS service subprocess with TLS cert generation."""
def __init__(self, port: int, cert_path: str, key_path: str):
template_dir = _TEMPLATES / "https"
env = {
**os.environ,
"NODE_NAME": "test-node",
"PORT": str(port),
"PYTHONPATH": str(template_dir),
"LOG_TARGET": "",
"TLS_CERT": cert_path,
"TLS_KEY": key_path,
}
self._proc = subprocess.Popen(
[_PYTHON, str(template_dir / "server.py")],
cwd=str(template_dir),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
env=env,
text=True,
)
self._q: queue.Queue = queue.Queue()
self._reader = threading.Thread(target=self._read_loop, daemon=True)
self._reader.start()
def _read_loop(self) -> None:
assert self._proc.stdout is not None
for line in self._proc.stdout:
self._q.put(line.rstrip("\n"))
def drain(self, timeout: float = 2.0) -> list[str]:
return _drain(self._q, timeout)
def stop(self) -> None:
self._proc.terminate()
try:
self._proc.wait(timeout=3)
except subprocess.TimeoutExpired:
self._proc.kill()
self._proc.wait()
@pytest.fixture
def https_service():
"""Start an HTTPS server with a temporary self-signed cert."""
started: list[_HTTPSServiceProcess] = []
tmp_dirs: list[tempfile.TemporaryDirectory] = []
def _start() -> tuple[int, callable]:
port = _free_port()
tmp = tempfile.TemporaryDirectory()
tmp_dirs.append(tmp)
cert_path = os.path.join(tmp.name, "cert.pem")
key_path = os.path.join(tmp.name, "key.pem")
_generate_self_signed_cert(cert_path, key_path)
svc = _HTTPSServiceProcess(port, cert_path, key_path)
started.append(svc)
if not _wait_for_tls_port(port):
svc.stop()
pytest.fail(f"HTTPS service did not bind to port {port} within 10s")
svc.drain(timeout=0.3)
return port, svc.drain
yield _start
for svc in started:
svc.stop()
for tmp in tmp_dirs:
tmp.cleanup()
@pytest.mark.live
class TestHTTPSLive:
def test_get_request_logged(self, https_service):
port, drain = https_service()
resp = requests.get(
f"https://127.0.0.1:{port}/admin", timeout=5, verify=False,
)
assert resp.status_code == 403
lines = drain()
assert_rfc5424(lines, service="https", event_type="request")
def test_tls_handshake(self, https_service):
port, drain = https_service()
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
with socket.create_connection(("127.0.0.1", port), timeout=5) as sock:
with ctx.wrap_socket(sock, server_hostname="127.0.0.1") as tls:
assert tls.version() is not None
def test_server_header_set(self, https_service):
port, drain = https_service()
resp = requests.get(
f"https://127.0.0.1:{port}/", timeout=5, verify=False,
)
assert "Server" in resp.headers
assert resp.headers["Server"] != ""
def test_post_body_logged(self, https_service):
port, drain = https_service()
requests.post(
f"https://127.0.0.1:{port}/login",
data={"username": "admin", "password": "secret"},
timeout=5,
verify=False,
)
lines = drain()
assert any("body=" in line for line in lines if "request" in line), (
"Expected 'body=' in request log line. Got:\n" + "\n".join(lines[:10])
)
def test_method_and_path_in_log(self, https_service):
port, drain = https_service()
requests.get(
f"https://127.0.0.1:{port}/secret/file.txt", timeout=5, verify=False,
)
lines = drain()
matched = assert_rfc5424(lines, service="https", event_type="request")
assert "GET" in matched or 'method="GET"' in matched
assert "/secret/file.txt" in matched or 'path="/secret/file.txt"' in matched

View File

@@ -60,7 +60,7 @@ class TestIMAPLive:
pass
lines += drain()
matched = assert_rfc5424(lines, service="imap", event_type="auth")
assert "failed" in matched, f"Expected auth failure in log. Got:\n{matched!r}"
assert "failure" in matched, f"Expected auth failure in log. Got:\n{matched!r}"
def test_select_inbox_after_login(self, live_service):
port, drain = live_service("imap")

View File

@@ -9,7 +9,11 @@ from tests.live.conftest import assert_rfc5424
@pytest.mark.live
class TestMQTTLive:
def test_connect_accepted(self, live_service):
port, drain = live_service("mqtt")
# The honeypot defaults to auth-required (post-2018 realistic
# broker posture). Opt into accept-all mode to exercise the
# happy-path CONNACK rc=0 code path. See decnet/templates/mqtt/
# server.py::MQTT_ACCEPT_ALL.
port, drain = live_service("mqtt", env={"MQTT_ACCEPT_ALL": "1"})
connected = []
client = mqtt.Client(client_id="test-scanner")
client.on_connect = lambda c, u, f, rc: connected.append(rc)
@@ -48,7 +52,9 @@ class TestMQTTLive:
)
def test_subscribe_logged(self, live_service):
port, drain = live_service("mqtt")
# SUBSCRIBE is gated on successful auth — accept-all lets the test
# reach the subscribe path without planting credentials.
port, drain = live_service("mqtt", env={"MQTT_ACCEPT_ALL": "1"})
subscribed = []
client = mqtt.Client(client_id="sub-test")
client.on_subscribe = lambda c, u, mid, qos: subscribed.append(mid)

View File

@@ -0,0 +1,219 @@
"""
Live integration tests for the MySQL dashboard backend.
Requires a real MySQL server. Skipped unless ``DECNET_DB_URL`` (or
``DECNET_MYSQL_TEST_URL``) is exported pointing at a running instance,
e.g. a throw-away docker container:
docker run -d --rm --name decnet-mysql-test \
-e MYSQL_ROOT_PASSWORD=root -e MYSQL_DATABASE=decnet \
-e MYSQL_USER=decnet -e MYSQL_PASSWORD=decnet \
-p 3307:3306 mysql:8
# Either url works; the connecting account MUST have CREATE/DROP DATABASE
# privilege because each xdist worker uses its own throwaway schema.
export DECNET_DB_URL='mysql+aiomysql://root:root@127.0.0.1:3307/decnet'
pytest -m live tests/live/test_mysql_backend_live.py
Each worker creates ``test_decnet_<worker>`` on session start and drops it
on session end. ``<worker>`` is ``master`` outside xdist, ``gw0``/``gw1``/…
under it, so parallel runs never clash.
"""
from __future__ import annotations
import json
import os
import uuid as _uuid
from datetime import datetime, timedelta, timezone
from urllib.parse import urlparse, urlunparse
import pytest
from sqlalchemy import text
from sqlalchemy.ext.asyncio import create_async_engine
from tests.live.conftest import _mysql_available
from decnet.web.db.mysql.repository import MySQLRepository
LIVE_URL = "mysql+asyncmy://root:root@127.0.0.1:3307/decnet"
pytestmark = [
pytest.mark.live,
# Pin every test in this module to the module-scoped event loop. The
# module-scoped ``mysql_test_db_url`` fixture (and transitively the
# asyncmy connection pool it seeds) is bound to that loop; running the
# tests on their own per-function loops trips pytest-asyncio's
# "Future attached to a different loop" guard the instant the repo
# reuses a pooled connection.
pytest.mark.asyncio(loop_scope="module"),
pytest.mark.skipif(
not (LIVE_URL and LIVE_URL.startswith("mysql")),
reason="Set DECNET_DB_URL=mysql+aiomysql://... to run MySQL live tests",
),
pytest.mark.skipif(
not _mysql_available(),
reason="MySQL not available on 127.0.0.1:3307"
)
]
def _worker_id() -> str:
"""Return a stable identifier for the current xdist worker (``master`` when single-process)."""
return os.environ.get("PYTEST_XDIST_WORKER", "master")
def _split_url(url: str) -> tuple[str, str]:
"""Return (server_url_without_db, test_db_name)."""
parsed = urlparse(url)
server_url = urlunparse(parsed._replace(path=""))
db_name = f"test_decnet_{_worker_id()}"
return server_url, db_name
def _url_with_db(server_url: str, db_name: str) -> str:
parsed = urlparse(server_url)
return urlunparse(parsed._replace(path=f"/{db_name}"))
@pytest.fixture(scope="module")
async def mysql_test_db_url():
"""Create a per-worker throwaway database, yield its URL, drop it on teardown.
Uses the configured URL's credentials to CREATE/DROP. If the account
lacks that privilege you'll see a clear SQL error — grant it with::
GRANT ALL PRIVILEGES ON `test\\_decnet\\_%`.* TO 'decnet'@'%';
or point ``DECNET_MYSQL_TEST_URL`` at a root-level URL.
"""
server_url, db_name = _split_url(LIVE_URL)
admin = create_async_engine(server_url, isolation_level="AUTOCOMMIT")
try:
async with admin.connect() as conn:
await conn.execute(text(f"DROP DATABASE IF EXISTS `{db_name}`"))
await conn.execute(text(f"CREATE DATABASE `{db_name}`"))
finally:
await admin.dispose()
yield _url_with_db(server_url, db_name)
# Teardown — always drop, even if tests errored.
admin = create_async_engine(server_url, isolation_level="AUTOCOMMIT")
try:
async with admin.connect() as conn:
await conn.execute(text(f"DROP DATABASE IF EXISTS `{db_name}`"))
finally:
await admin.dispose()
@pytest.fixture
async def mysql_repo(mysql_test_db_url):
"""Fresh schema per test — truncate between tests to keep them isolated."""
repo = MySQLRepository(url=mysql_test_db_url)
await repo.initialize()
yield repo
# Per-test cleanup: truncate with FK checks disabled so order doesn't matter.
async with repo.engine.begin() as conn:
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 0"))
for tbl in ("attacker_behavior", "attackers", "logs", "bounty", "state", "users"):
await conn.execute(text(f"TRUNCATE TABLE `{tbl}`"))
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 1"))
await repo.engine.dispose()
async def test_schema_creation_and_admin_seed(mysql_repo):
user = await mysql_repo.get_user_by_username(os.environ.get("DECNET_ADMIN_USER", "admin"))
assert user is not None
assert user["role"] == "admin"
async def test_add_and_query_logs(mysql_repo):
await mysql_repo.add_log({
"decky": "decky-01", "service": "ssh", "event_type": "connect",
"attacker_ip": "10.0.0.7", "raw_line": "connect from 10.0.0.7",
"fields": json.dumps({"port": 22}), "msg": "conn",
})
logs = await mysql_repo.get_logs(limit=10)
assert any(lg["attacker_ip"] == "10.0.0.7" for lg in logs)
assert await mysql_repo.get_total_logs() >= 1
async def test_json_field_search(mysql_repo):
await mysql_repo.add_log({
"decky": "d1", "service": "ssh", "event_type": "connect",
"attacker_ip": "1.2.3.4", "raw_line": "x",
"fields": json.dumps({"username": "root"}), "msg": "",
})
hits = await mysql_repo.get_logs(search="username:root")
assert any("1.2.3.4" == h["attacker_ip"] for h in hits)
async def test_histogram_buckets(mysql_repo):
now = datetime.now(timezone.utc).replace(microsecond=0)
for i in range(3):
await mysql_repo.add_log({
"decky": "h", "service": "ssh", "event_type": "connect",
"attacker_ip": "9.9.9.9",
"raw_line": f"line {i}", "fields": "{}", "msg": "",
"timestamp": (now - timedelta(minutes=i)).isoformat(),
})
buckets = await mysql_repo.get_log_histogram(interval_minutes=5)
assert buckets, "expected at least one histogram bucket"
for b in buckets:
assert "time" in b and "count" in b
assert b["count"] >= 1
async def test_bounty_roundtrip(mysql_repo):
await mysql_repo.add_bounty({
"decky": "decky-01", "service": "ssh", "attacker_ip": "10.0.0.1",
"bounty_type": "credentials",
"payload": {"username": "root", "password": "toor"},
})
out = await mysql_repo.get_bounties()
assert any(b["bounty_type"] == "credentials" for b in out)
async def test_user_crud(mysql_repo):
uid = str(_uuid.uuid4())
await mysql_repo.create_user({
"uuid": uid, "username": "live_tester",
"password_hash": "hashed", "role": "viewer", "must_change_password": True,
})
u = await mysql_repo.get_user_by_uuid(uid)
assert u and u["username"] == "live_tester"
await mysql_repo.update_user_role(uid, "admin")
u2 = await mysql_repo.get_user_by_uuid(uid)
assert u2["role"] == "admin"
ok = await mysql_repo.delete_user(uid)
assert ok
assert await mysql_repo.get_user_by_uuid(uid) is None
async def test_purge_clears_tables(mysql_repo):
await mysql_repo.add_log({
"decky": "p", "service": "ssh", "event_type": "connect",
"attacker_ip": "1.1.1.1", "raw_line": "x", "fields": "{}", "msg": "",
})
await mysql_repo.purge_logs_and_bounties()
assert await mysql_repo.get_total_logs() == 0
async def test_large_commands_blob_round_trips(mysql_repo):
"""Attacker.commands must handle >64 KiB (MEDIUMTEXT) — was 1406 errors on TEXT."""
big_commands = [
{"service": "ssh", "decky": "d", "command": "A" * 512,
"timestamp": "2026-04-15T12:00:00+00:00"}
for _ in range(500) # ~250 KiB
]
ip = "8.8.8.8"
now = datetime.now(timezone.utc)
row_uuid = await mysql_repo.upsert_attacker({
"ip": ip, "first_seen": now, "last_seen": now,
"event_count": 0, "service_count": 0, "decky_count": 0,
"commands": json.dumps(big_commands),
})
got = await mysql_repo.get_attacker_by_uuid(row_uuid)
assert got is not None
assert len(got["commands"]) == 500

View File

@@ -1,8 +1,12 @@
import pytest
import pymysql
from tests.live.conftest import assert_rfc5424
from tests.live.conftest import assert_rfc5424, _mysql_available
pytestmark = pytest.mark.skipif(
not _mysql_available(),
reason="MySQL not available on 127.0.0.1:3307"
)
@pytest.mark.live
class TestMySQLLive:

View File

@@ -55,4 +55,4 @@ class TestPOP3Live:
pass
lines += drain()
matched = assert_rfc5424(lines, service="pop3", event_type="auth")
assert "failed" in matched, f"Expected auth failure in log. Got:\n{matched!r}"
assert "failure" in matched, f"Expected auth failure in log. Got:\n{matched!r}"

View File

@@ -60,13 +60,18 @@ class TestPostgresLive:
def test_auth_hash_logged(self, live_service):
port, drain = live_service("postgres")
import psycopg2
# Real PG rejects before asking for a password when the requested
# db doesn't exist, and the honeypot faithfully mirrors that. So
# we must target an always-present database (``postgres`` is in
# _BASE_DBS) to get past startup and into the password-auth stage
# that this test is asserting on.
try:
psycopg2.connect(
host="127.0.0.1",
port=port,
user="root",
password="toor",
dbname="prod",
dbname="postgres",
connect_timeout=5,
)
except psycopg2.OperationalError:

View File

@@ -0,0 +1,508 @@
"""
Live service isolation tests.
Unlike tests/test_service_isolation.py (which mocks dependencies), these tests
run real workers against real (temporary) resources to verify graceful degradation
in conditions that actually occur on a host machine.
Dependency graph under test:
Collector → (Docker SDK, state file, log file)
Ingester → (Collector's JSON output, DB repo)
Attacker → (DB repo)
Sniffer → (MACVLAN interface, scapy, state file)
API → (DB init, all workers, Docker, health endpoint)
Run: pytest -m live tests/live/test_service_isolation_live.py -v
"""
import asyncio
import json
import os
import uuid as _uuid
from pathlib import Path
import httpx
import pytest
pytestmark = pytest.mark.skipif(
os.environ.get("CI") == "true",
reason="live tests run locally, CI environment not advanced enough to handle this."
)
# Must be set before any decnet import
os.environ.setdefault("DECNET_JWT_SECRET", "test-secret-key-at-least-32-chars-long!!")
os.environ.setdefault("DECNET_ADMIN_PASSWORD", "test-password-123")
os.environ["DECNET_CONTRACT_TEST"] = "true"
from decnet.collector.worker import ( # noqa: E402
log_collector_worker,
parse_rfc5424,
_load_service_container_names,
is_service_container,
)
from decnet.web.ingester import log_ingestion_worker # noqa: E402
from decnet.profiler.worker import ( # noqa: E402
attacker_profile_worker,
_WorkerState,
_incremental_update,
)
from decnet.sniffer.worker import sniffer_worker, _interface_exists # noqa: E402
from decnet.web.api import app, lifespan # noqa: E402
from decnet.web.dependencies import repo # noqa: E402
from decnet.web.db.models import User, Log # noqa: E402
from decnet.web.auth import get_password_hash # noqa: E402
from decnet.env import DECNET_ADMIN_USER, DECNET_ADMIN_PASSWORD # noqa: E402
from sqlmodel import SQLModel # noqa: E402
from sqlalchemy import select # noqa: E402
from sqlalchemy.ext.asyncio import ( # noqa: E402
AsyncSession,
async_sessionmaker,
create_async_engine,
)
from sqlalchemy.pool import StaticPool # noqa: E402
# ─── Shared fixtures ────────────────────────────────────────────────────────
@pytest.fixture(scope="module")
def event_loop():
loop = asyncio.new_event_loop()
yield loop
loop.close()
@pytest.fixture(scope="module", autouse=True)
async def live_db():
"""Real in-memory SQLite — shared across this module."""
engine = create_async_engine(
"sqlite+aiosqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
session_factory = async_sessionmaker(
engine, class_=AsyncSession, expire_on_commit=False
)
repo.engine = engine
repo.session_factory = session_factory
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
async with session_factory() as session:
existing = await session.execute(
select(User).where(User.username == DECNET_ADMIN_USER)
)
if not existing.scalar_one_or_none():
session.add(
User(
uuid=str(_uuid.uuid4()),
username=DECNET_ADMIN_USER,
password_hash=get_password_hash(DECNET_ADMIN_PASSWORD),
role="admin",
must_change_password=False,
)
)
await session.commit()
yield
await engine.dispose()
@pytest.fixture(scope="module")
async def live_client(live_db):
async with httpx.AsyncClient(
transport=httpx.ASGITransport(app=app),
base_url="http://test",
) as ac:
yield ac
@pytest.fixture(scope="module")
async def token(live_client):
resp = await live_client.post(
"/api/v1/auth/login",
json={"username": DECNET_ADMIN_USER, "password": DECNET_ADMIN_PASSWORD},
)
return resp.json()["access_token"]
# ─── Collector live isolation ────────────────────────────────────────────────
@pytest.mark.live
class TestCollectorLiveIsolation:
"""Real collector behaviour against the actual Docker daemon."""
async def test_collector_finds_no_deckies_without_state(self, tmp_path):
"""With no deckies in state and no DECNET labels, the scan rejects
every container.
is_service_container has two acceptance paths:
1. label-based (decnet.fleet.service / decnet.topology.service)
2. name match against decnet-state.json
With state empty AND labels absent, both paths must reject. We
feed synthetic container objects (no real Docker call) so the
result is independent of whatever fleet may already be running on
the host — which would otherwise satisfy path (1).
"""
import decnet.config as cfg
from unittest.mock import MagicMock
original_state = cfg.STATE_FILE
try:
cfg.STATE_FILE = tmp_path / "empty-state.json"
unlabeled = MagicMock()
unlabeled.name = "some-random-container"
unlabeled.attrs = {"Config": {"Labels": {}}}
unlabeled.labels = {}
assert is_service_container(unlabeled) is False
finally:
cfg.STATE_FILE = original_state
async def test_state_loader_returns_empty_without_state_file(self):
"""Real _load_service_container_names against no state file."""
import decnet.config as cfg
original = cfg.STATE_FILE
try:
cfg.STATE_FILE = Path("/tmp/nonexistent-decnet-state-live.json")
result = _load_service_container_names()
assert result == set()
finally:
cfg.STATE_FILE = original
def test_rfc5424_parser_handles_real_formats(self):
"""Parser works on real log lines, not just test fixtures."""
valid = '<134>1 2026-04-14T12:00:00Z decky-01 ssh - login_attempt [relay@55555 src_ip="10.0.0.1" username="root" password="toor"] Failed login'
result = parse_rfc5424(valid)
assert result is not None
assert result["decky"] == "decky-01"
assert result["service"] == "ssh"
assert result["attacker_ip"] == "10.0.0.1"
assert result["fields"]["username"] == "root"
# Garbage must return None, not crash
assert parse_rfc5424("random garbage") is None
assert parse_rfc5424("") is None
def test_container_filter_rejects_real_system_containers(self):
"""is_service_container must not match system containers."""
import decnet.config as cfg
original = cfg.STATE_FILE
try:
cfg.STATE_FILE = Path("/tmp/nonexistent-decnet-state-live.json")
# With no state, nothing is a service container
assert is_service_container("dockerd") is False
assert is_service_container("portainer") is False
assert is_service_container("kube-proxy") is False
finally:
cfg.STATE_FILE = original
# ─── Ingester live isolation ─────────────────────────────────────────────────
@pytest.mark.live
class TestIngesterLiveIsolation:
"""Real ingester against real DB and real filesystem."""
async def test_ingester_waits_for_missing_log_file(self, tmp_path):
"""Ingester must poll patiently when the log file doesn't exist yet."""
log_base = str(tmp_path / "missing.log")
os.environ["DECNET_INGEST_LOG_FILE"] = log_base
try:
task = asyncio.create_task(log_ingestion_worker(repo))
await asyncio.sleep(0.5)
assert not task.done(), "Ingester should be waiting, not exited"
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
finally:
os.environ.pop("DECNET_INGEST_LOG_FILE", None)
async def test_ingester_processes_real_json_into_db(self, tmp_path):
"""Write real JSON log lines → ingester inserts them into the real DB."""
json_file = tmp_path / "ingest.json"
log_base = str(tmp_path / "ingest.log")
record = {
"timestamp": "2026-04-14 12:00:00",
"decky": "decky-live-01",
"service": "ssh",
"event_type": "login_attempt",
"attacker_ip": "10.99.99.1",
"fields": {"username": "root", "password": "toor"},
"msg": "Failed login",
"raw_line": '<134>1 2026-04-14T12:00:00Z decky-live-01 ssh - login_attempt [relay@55555 src_ip="10.99.99.1"] Failed login',
}
json_file.write_text(json.dumps(record) + "\n")
os.environ["DECNET_INGEST_LOG_FILE"] = log_base
try:
task = asyncio.create_task(log_ingestion_worker(repo))
# Give ingester time to pick up the file and process it
await asyncio.sleep(1.5)
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
# Verify the record landed in the real DB
total = await repo.get_total_logs()
assert total >= 1
logs = await repo.get_logs(limit=100, offset=0)
matching = [l for l in logs if l["attacker_ip"] == "10.99.99.1"]
assert len(matching) >= 1
assert matching[0]["service"] == "ssh"
finally:
os.environ.pop("DECNET_INGEST_LOG_FILE", None)
async def test_ingester_skips_malformed_lines_without_crashing(self, tmp_path):
"""Ingester must skip bad JSON and keep going on good lines."""
json_file = tmp_path / "mixed.json"
log_base = str(tmp_path / "mixed.log")
good_record = {
"timestamp": "2026-04-14 13:00:00",
"decky": "decky-live-02",
"service": "http",
"event_type": "request",
"attacker_ip": "10.88.88.1",
"fields": {},
"msg": "",
"raw_line": "<134>1 2026-04-14T13:00:00Z decky-live-02 http - request -",
}
json_file.write_text(
"not valid json\n"
"{broken too\n"
+ json.dumps(good_record)
+ "\n"
)
os.environ["DECNET_INGEST_LOG_FILE"] = log_base
try:
task = asyncio.create_task(log_ingestion_worker(repo))
await asyncio.sleep(1.5)
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
# The good record should have made it through
logs = await repo.get_logs(limit=100, offset=0)
matching = [l for l in logs if l["attacker_ip"] == "10.88.88.1"]
assert len(matching) >= 1
finally:
os.environ.pop("DECNET_INGEST_LOG_FILE", None)
async def test_ingester_exits_gracefully_without_env_var(self):
"""Ingester must return immediately when DECNET_INGEST_LOG_FILE is unset."""
os.environ.pop("DECNET_INGEST_LOG_FILE", None)
# Should complete instantly with no error
await log_ingestion_worker(repo)
# ─── Attacker worker live isolation ──────────────────────────────────────────
@pytest.mark.live
class TestAttackerWorkerLiveIsolation:
"""Real attacker worker against real DB."""
async def test_attacker_worker_cold_starts_on_empty_db(self):
"""Worker cold start must handle an empty database without error."""
state = _WorkerState()
await _incremental_update(repo, state)
assert state.initialized is True
async def test_attacker_worker_builds_profile_from_real_logs(self):
"""Worker must build attacker profiles from logs already in the DB."""
# Seed some logs from a known attacker IP
for i in range(3):
await repo.add_log({
"timestamp": f"2026-04-14 14:0{i}:00",
"decky": "decky-live-03",
"service": "ssh" if i < 2 else "http",
"event_type": "login_attempt",
"attacker_ip": "10.77.77.1",
"fields": {"username": "admin"},
"msg": "",
"raw_line": f'<134>1 2026-04-14T14:0{i}:00Z decky-live-03 {"ssh" if i < 2 else "http"} - login_attempt [relay@55555 src_ip="10.77.77.1" username="admin"]',
})
state = _WorkerState()
await _incremental_update(repo, state)
# The worker should have created an attacker record
result = await repo.get_attackers(limit=100, offset=0, search="10.77.77.1")
matching = [a for a in result if a["ip"] == "10.77.77.1"]
assert len(matching) >= 1
assert matching[0]["event_count"] >= 3
async def test_attacker_worker_survives_cycle_with_no_new_logs(self):
"""Incremental update with no new logs must not crash or corrupt state."""
state = _WorkerState()
await _incremental_update(repo, state)
last_id = state.last_log_id
# Second update with no new data
await _incremental_update(repo, state)
assert state.last_log_id >= last_id # unchanged or higher
# ─── Sniffer live isolation ──────────────────────────────────────────────────
@pytest.mark.live
class TestSnifferLiveIsolation:
"""Real sniffer against the actual host network stack."""
async def test_sniffer_exits_cleanly_no_interface(self, tmp_path):
"""Sniffer must exit gracefully when MACVLAN interface doesn't exist."""
os.environ["DECNET_SNIFFER_IFACE"] = "decnet_fake_iface_xyz"
try:
await sniffer_worker(str(tmp_path / "sniffer.log"))
# Should return without exception
finally:
os.environ.pop("DECNET_SNIFFER_IFACE", None)
def test_interface_exists_check_works(self):
"""_interface_exists returns True for loopback, False for nonsense."""
import os
lo_exists = os.path.exists("/sys/class/net/lo")
if lo_exists:
assert _interface_exists("lo") is True
else:
pytest.skip("loopback interface not found, probably in CI. passing...")
assert _interface_exists("definitely_not_a_real_iface") is False
def test_sniffer_engine_isolation_from_db(self):
"""SnifferEngine has zero DB dependency — works standalone."""
from decnet.sniffer.fingerprint import SnifferEngine
written: list[str] = []
engine = SnifferEngine(
ip_to_decky={"192.168.1.10": "decky-01"},
write_fn=written.append,
)
engine._log("decky-01", "tls_client_hello", src_ip="10.0.0.1", ja3="abc123")
assert len(written) == 1
assert "decky-01" in written[0]
assert "10.0.0.1" in written[0]
# ─── API lifespan live isolation ─────────────────────────────────────────────
@pytest.mark.live
class TestApiLifespanLiveIsolation:
"""Real API lifespan against real DB and real host state."""
async def test_api_serves_requests_in_contract_mode(
self, live_client, token
):
"""With workers disabled, API must still serve all endpoints."""
# Stats
resp = await live_client.get(
"/api/v1/stats",
headers={"Authorization": f"Bearer {token}"},
)
assert resp.status_code == 200
# Health
resp = await live_client.get(
"/api/v1/health",
headers={"Authorization": f"Bearer {token}"},
)
assert resp.status_code in (200, 503)
assert "components" in resp.json()
async def test_health_reflects_real_db_state(self, live_client, token):
"""Health endpoint correctly reports DB as ok with real in-memory DB."""
resp = await live_client.get(
"/api/v1/health",
headers={"Authorization": f"Bearer {token}"},
)
assert resp.json()["components"]["database"]["status"] == "ok"
async def test_health_reports_workers_not_started(self, live_client, token):
"""In contract-test mode, workers are not started — health must report that."""
resp = await live_client.get(
"/api/v1/health",
headers={"Authorization": f"Bearer {token}"},
)
data = resp.json()
for w in ("ingestion_worker", "collector_worker", "attacker_worker"):
assert data["components"][w]["status"] == "failing"
assert "not started" in data["components"][w]["detail"]
# ─── Cross-service cascade live tests ────────────────────────────────────────
@pytest.mark.live
class TestCascadeLiveIsolation:
"""Verify that real component failures do not cascade."""
async def test_ingester_survives_collector_never_writing(self, tmp_path):
"""When the collector never writes output, ingester waits without crashing."""
log_base = str(tmp_path / "no-collector.log")
os.environ["DECNET_INGEST_LOG_FILE"] = log_base
try:
task = asyncio.create_task(log_ingestion_worker(repo))
await asyncio.sleep(0.5)
assert not task.done(), "Ingester crashed instead of waiting"
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
finally:
os.environ.pop("DECNET_INGEST_LOG_FILE", None)
async def test_api_serves_during_worker_failure(self, live_client, token):
"""API must respond to requests even when all workers are dead."""
# Verify multiple endpoints still work
for endpoint in ("/api/v1/stats", "/api/v1/health", "/api/v1/logs"):
resp = await live_client.get(
endpoint,
headers={"Authorization": f"Bearer {token}"},
)
assert resp.status_code != 500, f"{endpoint} returned 500"
async def test_sniffer_failure_invisible_to_api(self, live_client, token):
"""Sniffer crash must not affect API responses."""
# Force sniffer to fail
os.environ["DECNET_SNIFFER_IFACE"] = "nonexistent_iface_xyz"
try:
await sniffer_worker(str(Path("/tmp/sniffer-cascade.log")))
finally:
os.environ.pop("DECNET_SNIFFER_IFACE", None)
# API should be completely unaffected
resp = await live_client.get(
"/api/v1/stats",
headers={"Authorization": f"Bearer {token}"},
)
assert resp.status_code == 200
async def test_attacker_worker_independent_of_ingester(self):
"""Attacker worker runs against real DB regardless of ingester state."""
state = _WorkerState()
# Should work fine — it queries the DB directly, not the ingester
await _incremental_update(repo, state)
assert state.initialized is True