Native sshd and pam_unix lines route through rsyslog without the relay@55555 SD wrapper and without key=value pairs, so attacker_ip fell through to "Unknown". Add a prose-IP fallback to both parsers: anchored patterns (from/rhost/client/src) win first so we never pick the local listener in "Connection from X port Y on Z port 22", with a bare-IPv4 scan as the last resort.
841 lines
35 KiB
Python
841 lines
35 KiB
Python
"""Tests for the host-side Docker log collector."""
|
|
|
|
import json
|
|
import asyncio
|
|
import threading
|
|
import time
|
|
import pytest
|
|
from types import SimpleNamespace
|
|
from unittest.mock import patch, MagicMock
|
|
from decnet.collector import parse_rfc5424, is_service_container, is_service_event
|
|
from decnet.collector.worker import (
|
|
_stream_container,
|
|
_load_service_container_names,
|
|
_should_ingest,
|
|
_reset_rate_limiter,
|
|
log_collector_worker,
|
|
)
|
|
|
|
_KNOWN_NAMES = {"omega-decky-http", "omega-decky-smtp", "relay-decky-ftp"}
|
|
|
|
|
|
def _make_container(name="omega-decky-http"):
|
|
return SimpleNamespace(name=name)
|
|
|
|
|
|
class TestParseRfc5424:
|
|
def _make_line(self, fields_str="", msg=""):
|
|
sd = f"[relay@55555 {fields_str}]" if fields_str else "-"
|
|
suffix = f" {msg}" if msg else ""
|
|
return f"<134>1 2024-01-15T12:00:00+00:00 decky-01 http - request {sd}{suffix}"
|
|
|
|
def test_returns_none_for_non_decnet_line(self):
|
|
assert parse_rfc5424("not a syslog line") is None
|
|
|
|
def test_returns_none_for_empty_line(self):
|
|
assert parse_rfc5424("") is None
|
|
|
|
def test_parses_basic_fields(self):
|
|
line = self._make_line()
|
|
result = parse_rfc5424(line)
|
|
assert result is not None
|
|
assert result["decky"] == "decky-01"
|
|
assert result["service"] == "http"
|
|
assert result["event_type"] == "request"
|
|
|
|
def test_parses_structured_data_fields(self):
|
|
line = self._make_line('src_ip="1.2.3.4" method="GET" path="/login"')
|
|
result = parse_rfc5424(line)
|
|
assert result is not None
|
|
assert result["fields"]["src_ip"] == "1.2.3.4"
|
|
assert result["fields"]["method"] == "GET"
|
|
assert result["fields"]["path"] == "/login"
|
|
|
|
def test_extracts_attacker_ip_from_src_ip(self):
|
|
line = self._make_line('src_ip="10.0.0.5"')
|
|
result = parse_rfc5424(line)
|
|
assert result["attacker_ip"] == "10.0.0.5"
|
|
|
|
def test_extracts_attacker_ip_from_src(self):
|
|
line = self._make_line('src="10.0.0.5"')
|
|
result = parse_rfc5424(line)
|
|
assert result["attacker_ip"] == "10.0.0.5"
|
|
|
|
def test_extracts_attacker_ip_from_client_ip(self):
|
|
line = self._make_line('client_ip="10.0.0.7"')
|
|
result = parse_rfc5424(line)
|
|
assert result["attacker_ip"] == "10.0.0.7"
|
|
|
|
def test_extracts_attacker_ip_from_remote_ip(self):
|
|
line = self._make_line('remote_ip="10.0.0.8"')
|
|
result = parse_rfc5424(line)
|
|
assert result["attacker_ip"] == "10.0.0.8"
|
|
|
|
def test_extracts_attacker_ip_from_ip(self):
|
|
line = self._make_line('ip="10.0.0.9"')
|
|
result = parse_rfc5424(line)
|
|
assert result["attacker_ip"] == "10.0.0.9"
|
|
|
|
def test_attacker_ip_defaults_to_unknown(self):
|
|
line = self._make_line('user="admin"')
|
|
result = parse_rfc5424(line)
|
|
assert result["attacker_ip"] == "Unknown"
|
|
|
|
def test_parses_line_with_real_procid(self):
|
|
"""sshd/sudo log via native syslog, so rsyslog fills PROCID with the
|
|
real PID instead of NILVALUE. The parser must accept either form."""
|
|
line = (
|
|
"<38>1 2026-04-18T08:27:21.862365+00:00 omega-decky sshd 940 - - "
|
|
"Accepted password for root from 192.168.1.5 port 43210 ssh2"
|
|
)
|
|
result = parse_rfc5424(line)
|
|
assert result is not None
|
|
assert result["decky"] == "omega-decky"
|
|
assert result["service"] == "sshd"
|
|
assert "Accepted password" in result["msg"]
|
|
# Native sshd lines have no key=value; the prose fallback pulls
|
|
# the IP out of "from <ip>".
|
|
assert result["attacker_ip"] == "192.168.1.5"
|
|
|
|
def test_extracts_attacker_ip_from_sshd_prose(self):
|
|
"""sshd routed via rsyslog emits free prose with no SD block and no
|
|
key=value pairs. The parser must still find the remote IP."""
|
|
cases = [
|
|
(
|
|
"<38>1 2026-04-27T03:08:48+00:00 dmz-gateway sshd 940 - - "
|
|
"Failed password for root from 157.66.144.16 port 42772 ssh2",
|
|
"157.66.144.16",
|
|
),
|
|
(
|
|
"<38>1 2026-04-27T03:08:45+00:00 dmz-gateway sshd 940 - - "
|
|
"Connection from 157.66.144.16 port 42772 on 10.0.0.2 port 22 rdomain \"\"",
|
|
"157.66.144.16", # must beat the local listener 10.0.0.2
|
|
),
|
|
(
|
|
"<38>1 2026-04-27T03:08:49+00:00 dmz-gateway sshd 940 - - "
|
|
"Connection closed by authenticating user root 157.66.144.16 port 42772 [preauth]",
|
|
"157.66.144.16",
|
|
),
|
|
(
|
|
"<38>1 2026-04-27T03:08:46+00:00 dmz-gateway sshd 940 - - "
|
|
"pam_unix(sshd:auth): authentication failure; "
|
|
"logname= uid=0 euid=0 tty=ssh ruser= rhost=157.66.144.16 user=root",
|
|
"157.66.144.16",
|
|
),
|
|
]
|
|
for line, expected in cases:
|
|
result = parse_rfc5424(line)
|
|
assert result is not None, line
|
|
assert result["attacker_ip"] == expected, (line, result["attacker_ip"])
|
|
|
|
def test_extracts_attacker_ip_from_msg_body_kv(self):
|
|
"""SSH container's bash PROMPT_COMMAND uses `logger -t bash "CMD ... src=IP ..."`
|
|
which produces an RFC 5424 line with NILVALUE SD — the IP lives in the
|
|
free-form msg, not in SD params. The collector should still pick it up."""
|
|
line = (
|
|
"<134>1 2024-01-15T12:00:00+00:00 decky-01 bash - - - "
|
|
"CMD uid=0 user=root src=198.51.100.7 pwd=/root cmd=ls -la"
|
|
)
|
|
result = parse_rfc5424(line)
|
|
assert result is not None
|
|
assert result["attacker_ip"] == "198.51.100.7"
|
|
# `fields` stays empty — the frontend's parseEventBody renders kv
|
|
# pairs straight from msg; we don't want duplicate pills.
|
|
assert result["fields"] == {}
|
|
assert "CMD uid=0" in result["msg"]
|
|
|
|
def test_sd_ip_wins_over_msg_body(self):
|
|
"""If SD params carry an IP, the msg-body fallback must not overwrite it."""
|
|
line = (
|
|
'<134>1 2024-01-15T12:00:00+00:00 decky-01 ssh - login '
|
|
'[relay@55555 src_ip="1.2.3.4"] rogue src=9.9.9.9 entry'
|
|
)
|
|
result = parse_rfc5424(line)
|
|
assert result["attacker_ip"] == "1.2.3.4"
|
|
# SD wins; `src=` from msg isn't folded into fields (msg retains it).
|
|
assert result["fields"]["src_ip"] == "1.2.3.4"
|
|
assert "src" not in result["fields"]
|
|
|
|
def test_parses_msg(self):
|
|
line = self._make_line(msg="hello world")
|
|
result = parse_rfc5424(line)
|
|
assert result["msg"] == "hello world"
|
|
|
|
def test_nilvalue_sd_with_msg(self):
|
|
line = "<134>1 2024-01-15T12:00:00+00:00 decky-01 http - request - some message"
|
|
result = parse_rfc5424(line)
|
|
assert result is not None
|
|
assert result["msg"] == "some message"
|
|
assert result["fields"] == {}
|
|
|
|
def test_raw_line_preserved(self):
|
|
line = self._make_line('src_ip="1.2.3.4"')
|
|
result = parse_rfc5424(line)
|
|
assert result["raw_line"] == line
|
|
|
|
def test_timestamp_formatted(self):
|
|
line = self._make_line()
|
|
result = parse_rfc5424(line)
|
|
assert result["timestamp"] == "2024-01-15 12:00:00"
|
|
|
|
def test_unescapes_sd_values(self):
|
|
line = self._make_line(r'path="/foo\"bar"')
|
|
result = parse_rfc5424(line)
|
|
assert result["fields"]["path"] == '/foo"bar'
|
|
|
|
def test_result_json_serializable(self):
|
|
line = self._make_line('src_ip="1.2.3.4" username="admin" password="s3cr3t"')
|
|
result = parse_rfc5424(line)
|
|
# Should not raise
|
|
json.dumps(result)
|
|
|
|
def test_invalid_timestamp_preserved_as_is(self):
|
|
line = "<134>1 not-a-date decky-01 http - request -"
|
|
result = parse_rfc5424(line)
|
|
assert result is not None
|
|
assert result["timestamp"] == "not-a-date"
|
|
|
|
def test_sd_rest_is_plain_text(self):
|
|
# When SD starts with neither '-' nor '[', treat as msg
|
|
line = "<134>1 2024-01-15T12:00:00+00:00 decky-01 http - request hello world"
|
|
result = parse_rfc5424(line)
|
|
assert result is not None
|
|
assert result["msg"] == "hello world"
|
|
|
|
def test_sd_with_msg_after_bracket(self):
|
|
line = '<134>1 2024-01-15T12:00:00+00:00 decky-01 http - request [relay@55555 src_ip="1.2.3.4"] login attempt'
|
|
result = parse_rfc5424(line)
|
|
assert result is not None
|
|
assert result["fields"]["src_ip"] == "1.2.3.4"
|
|
assert result["msg"] == "login attempt"
|
|
|
|
|
|
class TestIsServiceContainer:
|
|
def test_known_container_returns_true(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
|
|
assert is_service_container(_make_container("omega-decky-http")) is True
|
|
assert is_service_container(_make_container("omega-decky-smtp")) is True
|
|
assert is_service_container(_make_container("relay-decky-ftp")) is True
|
|
|
|
def test_base_container_returns_false(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
|
|
assert is_service_container(_make_container("omega-decky")) is False
|
|
|
|
def test_unrelated_container_returns_false(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
|
|
assert is_service_container(_make_container("nginx")) is False
|
|
|
|
def test_strips_leading_slash(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
|
|
assert is_service_container(_make_container("/omega-decky-http")) is True
|
|
assert is_service_container(_make_container("/omega-decky")) is False
|
|
|
|
def test_no_state_returns_false(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
|
|
assert is_service_container(_make_container("omega-decky-http")) is False
|
|
|
|
def test_string_argument(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
|
|
assert is_service_container("omega-decky-http") is True
|
|
assert is_service_container("/omega-decky-http") is True
|
|
assert is_service_container("nginx") is False
|
|
|
|
|
|
class TestIsServiceEvent:
|
|
def test_known_service_event_returns_true(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
|
|
assert is_service_event({"name": "omega-decky-smtp"}) is True
|
|
|
|
def test_base_event_returns_false(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
|
|
assert is_service_event({"name": "omega-decky"}) is False
|
|
|
|
def test_unrelated_event_returns_false(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
|
|
assert is_service_event({"name": "nginx"}) is False
|
|
|
|
def test_no_state_returns_false(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
|
|
assert is_service_event({"name": "omega-decky-smtp"}) is False
|
|
|
|
def test_strips_leading_slash(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
|
|
assert is_service_event({"name": "/omega-decky-smtp"}) is True
|
|
|
|
def test_empty_name(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
|
|
assert is_service_event({"name": ""}) is False
|
|
assert is_service_event({}) is False
|
|
|
|
|
|
class TestTopologyLabelDiscovery:
|
|
"""MazeNET topology containers aren't in decnet-state.json — the
|
|
collector discovers them via compose-time labels instead."""
|
|
|
|
def _labelled(self, name: str, labels: dict):
|
|
return SimpleNamespace(
|
|
name=name,
|
|
attrs={"Config": {"Labels": labels}},
|
|
labels=labels,
|
|
)
|
|
|
|
def test_topology_labelled_container_matches(self):
|
|
"""Unknown name + decnet.topology.service=true label → True."""
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
|
|
c = self._labelled(
|
|
"decky-2966-ssh",
|
|
{"decnet.topology.service": "true", "decnet.topology.id": "abc"},
|
|
)
|
|
assert is_service_container(c) is True
|
|
|
|
def test_base_container_label_does_not_match(self):
|
|
"""Base containers carry decnet.topology.role=base but NOT the
|
|
service marker — collector must ignore them or we double-capture
|
|
the sshd auth stream from both the base and the service share."""
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
|
|
c = self._labelled(
|
|
"decnet_t_af22dae8_decky-2966",
|
|
{"decnet.topology.role": "base", "decnet.topology.id": "abc"},
|
|
)
|
|
assert is_service_container(c) is False
|
|
|
|
def test_unrelated_container_with_labels_does_not_match(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
|
|
c = self._labelled("portainer", {"com.docker.compose.project": "portainer"})
|
|
assert is_service_container(c) is False
|
|
|
|
def test_topology_event_matches_via_label(self):
|
|
"""Docker start events flatten labels alongside 'name' in attrs —
|
|
is_service_event must detect that shape."""
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
|
|
attrs = {
|
|
"name": "decky-2966-ssh",
|
|
"decnet.topology.service": "true",
|
|
"decnet.topology.id": "abc",
|
|
}
|
|
assert is_service_event(attrs) is True
|
|
|
|
def test_fleet_and_topology_coexist(self):
|
|
"""Fleet match wins when the name is in state; topology label
|
|
catches containers that aren't."""
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
|
|
fleet_c = _make_container("omega-decky-http")
|
|
topo_c = self._labelled(
|
|
"decky-2966-ssh",
|
|
{"decnet.topology.service": "true"},
|
|
)
|
|
assert is_service_container(fleet_c) is True
|
|
assert is_service_container(topo_c) is True
|
|
|
|
def test_stub_without_attrs_still_works_via_labels(self):
|
|
"""Older test stubs use SimpleNamespace(name=…) with no .attrs —
|
|
falling back to .labels should still evaluate."""
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
|
|
c = SimpleNamespace(
|
|
name="decky-2966-ssh",
|
|
labels={"decnet.topology.service": "true"},
|
|
)
|
|
assert is_service_container(c) is True
|
|
|
|
|
|
class TestFleetLabelDiscovery:
|
|
"""Fleet (legacy) containers stamped with ``decnet.fleet.service=true``
|
|
by ``decnet/composer.py`` must be picked up by the events watcher even
|
|
when ``decnet-state.json`` hasn't been refreshed yet — that's the race
|
|
that previously caused freshly-deployed containers to be silently
|
|
ignored."""
|
|
|
|
def _labelled(self, name: str, labels: dict):
|
|
return SimpleNamespace(
|
|
name=name,
|
|
attrs={"Config": {"Labels": labels}},
|
|
labels=labels,
|
|
)
|
|
|
|
def test_fleet_labelled_container_matches_without_state(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
|
|
c = self._labelled(
|
|
"omega-decky-ssh",
|
|
{"decnet.fleet.service": "true", "decnet.fleet.decky": "omega-decky"},
|
|
)
|
|
assert is_service_container(c) is True
|
|
|
|
def test_fleet_labelled_event_matches_without_state(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
|
|
attrs = {
|
|
"name": "omega-decky-ssh",
|
|
"decnet.fleet.service": "true",
|
|
"decnet.fleet.decky": "omega-decky",
|
|
}
|
|
assert is_service_event(attrs) is True
|
|
|
|
def test_unlabelled_event_falls_back_to_state(self):
|
|
"""Containers built before this label landed still match by name."""
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
|
|
assert is_service_event({"name": "omega-decky-http"}) is True
|
|
|
|
def test_unrelated_label_does_not_match(self):
|
|
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
|
|
c = self._labelled(
|
|
"redis",
|
|
{"com.docker.compose.project": "redis", "decnet.fleet.service": "false"},
|
|
)
|
|
assert is_service_container(c) is False
|
|
assert is_service_event(
|
|
{"name": "redis", "decnet.fleet.service": "false"}
|
|
) is False
|
|
|
|
|
|
class TestLoadServiceContainerNames:
|
|
def test_with_valid_state(self, tmp_path, monkeypatch):
|
|
import decnet.config
|
|
from decnet.config import DeckyConfig, DecnetConfig
|
|
state_file = tmp_path / "state.json"
|
|
config = DecnetConfig(
|
|
mode="unihost", interface="eth0", subnet="192.168.1.0/24",
|
|
gateway="192.168.1.1",
|
|
deckies=[
|
|
DeckyConfig(name="decky-01", ip="192.168.1.10", services=["ssh", "http"],
|
|
distro="debian", base_image="debian", hostname="test",
|
|
build_base="debian:bookworm-slim"),
|
|
],
|
|
)
|
|
state_file.write_text(json.dumps({
|
|
"config": config.model_dump(),
|
|
"compose_path": "test.yml",
|
|
}))
|
|
monkeypatch.setattr(decnet.config, "STATE_FILE", state_file)
|
|
names = _load_service_container_names()
|
|
assert names == {"decky-01-ssh", "decky-01-http"}
|
|
|
|
def test_no_state(self, tmp_path, monkeypatch):
|
|
import decnet.config
|
|
state_file = tmp_path / "nonexistent.json"
|
|
monkeypatch.setattr(decnet.config, "STATE_FILE", state_file)
|
|
names = _load_service_container_names()
|
|
assert names == set()
|
|
|
|
|
|
class TestStreamContainer:
|
|
def test_streams_rfc5424_lines(self, tmp_path):
|
|
log_path = tmp_path / "test.log"
|
|
json_path = tmp_path / "test.json"
|
|
|
|
mock_container = MagicMock()
|
|
rfc_line = '<134>1 2024-01-15T12:00:00+00:00 decky-01 ssh - auth [relay@55555 src_ip="1.2.3.4"] login\n'
|
|
mock_container.logs.return_value = [rfc_line.encode("utf-8")]
|
|
|
|
mock_client = MagicMock()
|
|
mock_client.containers.get.return_value = mock_container
|
|
|
|
with patch("docker.from_env", return_value=mock_client):
|
|
_stream_container("test-id", log_path, json_path)
|
|
|
|
assert log_path.exists()
|
|
log_content = log_path.read_text()
|
|
assert "decky-01" in log_content
|
|
|
|
assert json_path.exists()
|
|
json_content = json_path.read_text().strip()
|
|
parsed = json.loads(json_content)
|
|
assert parsed["service"] == "ssh"
|
|
|
|
def test_handles_non_rfc5424_lines(self, tmp_path):
|
|
log_path = tmp_path / "test.log"
|
|
json_path = tmp_path / "test.json"
|
|
|
|
mock_container = MagicMock()
|
|
mock_container.logs.return_value = [b"just a plain log line\n"]
|
|
|
|
mock_client = MagicMock()
|
|
mock_client.containers.get.return_value = mock_container
|
|
|
|
with patch("docker.from_env", return_value=mock_client):
|
|
_stream_container("test-id", log_path, json_path)
|
|
|
|
assert log_path.exists()
|
|
# JSON file is only created when RFC5424 lines are parsed — not for plain lines.
|
|
assert not json_path.exists() or json_path.read_text() == ""
|
|
|
|
def test_handles_docker_error(self, tmp_path):
|
|
log_path = tmp_path / "test.log"
|
|
json_path = tmp_path / "test.json"
|
|
|
|
mock_client = MagicMock()
|
|
mock_client.containers.get.side_effect = Exception("Container not found")
|
|
|
|
with patch("docker.from_env", return_value=mock_client):
|
|
_stream_container("bad-id", log_path, json_path)
|
|
|
|
# Should not raise, just log the error
|
|
|
|
def test_skips_empty_lines(self, tmp_path):
|
|
log_path = tmp_path / "test.log"
|
|
json_path = tmp_path / "test.json"
|
|
|
|
mock_container = MagicMock()
|
|
mock_container.logs.return_value = [b"\n\n\n"]
|
|
|
|
mock_client = MagicMock()
|
|
mock_client.containers.get.return_value = mock_container
|
|
|
|
with patch("docker.from_env", return_value=mock_client):
|
|
_stream_container("test-id", log_path, json_path)
|
|
|
|
# All lines were empty — no file is created (lazy open).
|
|
assert not log_path.exists() or log_path.read_text() == ""
|
|
|
|
def test_log_file_recreated_after_deletion(self, tmp_path):
|
|
log_path = tmp_path / "test.log"
|
|
json_path = tmp_path / "test.json"
|
|
|
|
line1 = b"first line\n"
|
|
line2 = b"second line\n"
|
|
|
|
def _chunks():
|
|
yield line1
|
|
log_path.unlink() # simulate deletion between writes
|
|
yield line2
|
|
|
|
mock_container = MagicMock()
|
|
mock_container.logs.return_value = _chunks()
|
|
mock_client = MagicMock()
|
|
mock_client.containers.get.return_value = mock_container
|
|
|
|
with patch("docker.from_env", return_value=mock_client):
|
|
_stream_container("test-id", log_path, json_path)
|
|
|
|
assert log_path.exists(), "log file must be recreated after deletion"
|
|
content = log_path.read_text()
|
|
assert "second line" in content
|
|
|
|
def test_json_file_recreated_after_deletion(self, tmp_path):
|
|
log_path = tmp_path / "test.log"
|
|
json_path = tmp_path / "test.json"
|
|
|
|
rfc_line = (
|
|
'<134>1 2024-01-15T12:00:00+00:00 decky-01 ssh - auth '
|
|
'[relay@55555 src_ip="1.2.3.4"] login\n'
|
|
)
|
|
encoded = rfc_line.encode("utf-8")
|
|
|
|
def _chunks():
|
|
yield encoded
|
|
# Remove the json file between writes; the second RFC line should
|
|
# trigger a fresh file open.
|
|
if json_path.exists():
|
|
json_path.unlink()
|
|
yield encoded
|
|
|
|
mock_container = MagicMock()
|
|
mock_container.logs.return_value = _chunks()
|
|
mock_client = MagicMock()
|
|
mock_client.containers.get.return_value = mock_container
|
|
|
|
with patch("docker.from_env", return_value=mock_client):
|
|
_stream_container("test-id", log_path, json_path)
|
|
|
|
assert json_path.exists(), "json file must be recreated after deletion"
|
|
lines = [l for l in json_path.read_text().splitlines() if l.strip()]
|
|
assert len(lines) >= 1
|
|
|
|
def test_rotated_file_detected(self, tmp_path):
|
|
"""Simulate logrotate: rename old file away, new write should go to a fresh file."""
|
|
log_path = tmp_path / "test.log"
|
|
json_path = tmp_path / "test.json"
|
|
|
|
line1 = b"before rotation\n"
|
|
line2 = b"after rotation\n"
|
|
rotated = tmp_path / "test.log.1"
|
|
|
|
def _chunks():
|
|
yield line1
|
|
log_path.rename(rotated) # logrotate renames old file
|
|
yield line2
|
|
|
|
mock_container = MagicMock()
|
|
mock_container.logs.return_value = _chunks()
|
|
mock_client = MagicMock()
|
|
mock_client.containers.get.return_value = mock_container
|
|
|
|
with patch("docker.from_env", return_value=mock_client):
|
|
_stream_container("test-id", log_path, json_path)
|
|
|
|
assert log_path.exists(), "new log file must be created after rotation"
|
|
assert "after rotation" in log_path.read_text()
|
|
assert "before rotation" in rotated.read_text()
|
|
|
|
|
|
class TestIngestRateLimiter:
|
|
def setup_method(self):
|
|
_reset_rate_limiter()
|
|
|
|
def _event(self, event_type="connect", attacker_ip="1.2.3.4",
|
|
decky="decky-01", service="ssh"):
|
|
return {
|
|
"event_type": event_type,
|
|
"attacker_ip": attacker_ip,
|
|
"decky": decky,
|
|
"service": service,
|
|
}
|
|
|
|
def test_non_limited_event_types_always_pass(self):
|
|
# login_attempt / request / etc. carry distinguishing payload — never deduped.
|
|
for _ in range(5):
|
|
assert _should_ingest(self._event(event_type="login_attempt")) is True
|
|
assert _should_ingest(self._event(event_type="request")) is True
|
|
|
|
def test_first_connect_passes(self):
|
|
assert _should_ingest(self._event()) is True
|
|
|
|
def test_duplicate_connect_within_window_is_dropped(self):
|
|
assert _should_ingest(self._event()) is True
|
|
assert _should_ingest(self._event()) is False
|
|
assert _should_ingest(self._event()) is False
|
|
|
|
def test_different_attackers_tracked_independently(self):
|
|
assert _should_ingest(self._event(attacker_ip="1.1.1.1")) is True
|
|
assert _should_ingest(self._event(attacker_ip="2.2.2.2")) is True
|
|
|
|
def test_different_deckies_tracked_independently(self):
|
|
assert _should_ingest(self._event(decky="a")) is True
|
|
assert _should_ingest(self._event(decky="b")) is True
|
|
|
|
def test_different_services_tracked_independently(self):
|
|
assert _should_ingest(self._event(service="ssh")) is True
|
|
assert _should_ingest(self._event(service="http")) is True
|
|
|
|
def test_disconnect_and_connect_tracked_independently(self):
|
|
assert _should_ingest(self._event(event_type="connect")) is True
|
|
assert _should_ingest(self._event(event_type="disconnect")) is True
|
|
|
|
def test_window_expiry_allows_next_event(self, monkeypatch):
|
|
import decnet.collector.worker as worker
|
|
t = [1000.0]
|
|
monkeypatch.setattr(worker.time, "monotonic", lambda: t[0])
|
|
assert _should_ingest(self._event()) is True
|
|
assert _should_ingest(self._event()) is False
|
|
# Advance past 1-second window.
|
|
t[0] += 1.5
|
|
assert _should_ingest(self._event()) is True
|
|
|
|
def test_window_zero_disables_limiter(self, monkeypatch):
|
|
import decnet.collector.worker as worker
|
|
monkeypatch.setattr(worker, "_RL_WINDOW_SEC", 0.0)
|
|
for _ in range(10):
|
|
assert _should_ingest(self._event()) is True
|
|
|
|
def test_raw_log_gets_all_lines_json_dedupes(self, tmp_path):
|
|
"""End-to-end: duplicates hit the .log file but NOT the .json stream."""
|
|
log_path = tmp_path / "test.log"
|
|
json_path = tmp_path / "test.json"
|
|
line = (
|
|
'<134>1 2024-01-15T12:00:00+00:00 decky-01 ssh - connect '
|
|
'[relay@55555 src_ip="1.2.3.4"]\n'
|
|
)
|
|
payload = (line * 5).encode("utf-8")
|
|
|
|
mock_container = MagicMock()
|
|
mock_container.logs.return_value = [payload]
|
|
mock_client = MagicMock()
|
|
mock_client.containers.get.return_value = mock_container
|
|
|
|
with patch("docker.from_env", return_value=mock_client):
|
|
_stream_container("test-id", log_path, json_path)
|
|
|
|
# Raw log: all 5 lines preserved (forensic fidelity).
|
|
assert log_path.read_text().count("\n") == 5
|
|
# JSON ingest: only the first one written (4 dropped by the limiter).
|
|
json_lines = [l for l in json_path.read_text().splitlines() if l.strip()]
|
|
assert len(json_lines) == 1
|
|
|
|
def test_gc_trims_oversized_map(self, monkeypatch):
|
|
import decnet.collector.worker as worker
|
|
# Seed the map with stale entries, then push past the cap.
|
|
monkeypatch.setattr(worker, "_RL_MAX_ENTRIES", 10)
|
|
t = [1000.0]
|
|
monkeypatch.setattr(worker.time, "monotonic", lambda: t[0])
|
|
for i in range(9):
|
|
assert _should_ingest(self._event(attacker_ip=f"10.0.0.{i}")) is True
|
|
# Jump well past 60 windows to make prior entries stale.
|
|
t[0] += 1000.0
|
|
# This insertion pushes len to 10; GC triggers on >10 so stays.
|
|
assert _should_ingest(self._event(attacker_ip="10.0.0.99")) is True
|
|
assert _should_ingest(self._event(attacker_ip="10.0.0.100")) is True
|
|
# After the map exceeds the cap, stale entries must be purged.
|
|
assert len(worker._rl_last) < 10
|
|
|
|
|
|
class TestLogCollectorWorker:
|
|
@pytest.mark.asyncio
|
|
async def test_worker_initial_discovery(self, tmp_path):
|
|
log_file = str(tmp_path / "decnet.log")
|
|
|
|
mock_container = MagicMock()
|
|
mock_container.id = "c1"
|
|
mock_container.name = "/s-1"
|
|
# Mock labels to satisfy is_service_container
|
|
mock_container.labels = {"com.docker.compose.project": "decnet"}
|
|
|
|
mock_client = MagicMock()
|
|
mock_client.containers.list.return_value = [mock_container]
|
|
# Make events return an empty generator/iterator immediately
|
|
mock_client.events.return_value = iter([])
|
|
|
|
with patch("docker.from_env", return_value=mock_client), \
|
|
patch("decnet.collector.worker.is_service_container", return_value=True):
|
|
# Run with a short task timeout because it loops
|
|
try:
|
|
await asyncio.wait_for(log_collector_worker(log_file), timeout=0.1)
|
|
except (asyncio.TimeoutError, StopIteration):
|
|
pass
|
|
|
|
# Should have tried to list and watch events
|
|
mock_client.containers.list.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_worker_handles_events(self, tmp_path):
|
|
log_file = str(tmp_path / "decnet.log")
|
|
|
|
mock_client = MagicMock()
|
|
mock_client.containers.list.return_value = []
|
|
|
|
event = {
|
|
"id": "c2",
|
|
"Actor": {"Attributes": {"name": "s-2", "com.docker.compose.project": "decnet"}}
|
|
}
|
|
mock_client.events.return_value = iter([event])
|
|
|
|
with patch("docker.from_env", return_value=mock_client), \
|
|
patch("decnet.collector.worker.is_service_event", return_value=True):
|
|
try:
|
|
await asyncio.wait_for(log_collector_worker(log_file), timeout=0.1)
|
|
except (asyncio.TimeoutError, StopIteration):
|
|
pass
|
|
|
|
mock_client.events.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_worker_exception_handling(self, tmp_path):
|
|
log_file = str(tmp_path / "decnet.log")
|
|
mock_client = MagicMock()
|
|
mock_client.containers.list.side_effect = Exception("Docker down")
|
|
|
|
with patch("docker.from_env", return_value=mock_client):
|
|
# Should not raise
|
|
await log_collector_worker(log_file)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_event_watcher_retries_on_stream_break(self, tmp_path, monkeypatch):
|
|
"""A docker stream-decode hiccup must not silently end the
|
|
watcher: today the executor task would return cleanly and
|
|
future container starts would be dropped until an operator
|
|
restarted the unit. The retry loop is what keeps the collector
|
|
honest across daemon reloads."""
|
|
log_file = str(tmp_path / "decnet.log")
|
|
|
|
valid_event = {
|
|
"id": "c-resilient",
|
|
"Actor": {"Attributes": {"name": "resilient-svc"}},
|
|
}
|
|
|
|
# Patch time.sleep inside the worker so the retry's backoff
|
|
# doesn't actually wait — keeps the test under the budget.
|
|
monkeypatch.setattr("decnet.collector.worker.time.sleep", lambda *_: None)
|
|
|
|
# Sequence: raise (transient error), then SystemExit to break
|
|
# out of the while-True. SystemExit is BaseException-derived so
|
|
# the broad ``except Exception`` in production won't catch it —
|
|
# the watcher thread exits cleanly and the worker finishes.
|
|
# We don't try to assert _spawn was called: the dispatch path
|
|
# uses ``loop.call_soon_threadsafe(_spawn, ...)`` and patching
|
|
# the abstract loop method doesn't reach the concrete loop.
|
|
# The retry contract is fully verified by counting reconnect
|
|
# attempts.
|
|
events_calls = {"n": 0}
|
|
|
|
def _events(**_kw):
|
|
events_calls["n"] += 1
|
|
if events_calls["n"] == 1:
|
|
raise RuntimeError("stream decode error")
|
|
# Second call: clean exit. Watcher's retry means call #2
|
|
# happens at all; without retry, the RuntimeError would
|
|
# propagate out of the executor and the watcher would
|
|
# never call events() again.
|
|
return iter([])
|
|
|
|
mock_client = MagicMock()
|
|
mock_client.containers.list.return_value = []
|
|
mock_client.events.side_effect = _events
|
|
|
|
# del valid_event — unused now that we dropped the spawn assertion
|
|
del valid_event
|
|
|
|
with patch("docker.from_env", return_value=mock_client), \
|
|
patch("decnet.collector.worker.is_service_event", return_value=True):
|
|
try:
|
|
await asyncio.wait_for(log_collector_worker(log_file), timeout=2.0)
|
|
except (asyncio.TimeoutError, StopIteration, SystemExit):
|
|
pass
|
|
|
|
assert events_calls["n"] >= 2, (
|
|
f"expected >=2 events() calls (one failure + one reconnect) "
|
|
f"proving the retry loop, got {events_calls['n']}"
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_reconciler_picks_up_missed_container(self, tmp_path, monkeypatch):
|
|
"""Even if the event watcher wedges, the reconciler must catch
|
|
any service container that's already running. Simulates the
|
|
first-VPS-deploy bug: events() never yields, but a service
|
|
container exists in containers.list() — the worker had to be
|
|
restarted to pick it up. Now the reconciler does it
|
|
within RECONCILE_INTERVAL_S."""
|
|
log_file = str(tmp_path / "decnet.log")
|
|
monkeypatch.setenv("DECNET_COLLECTOR_RECONCILE_S", "0.05")
|
|
|
|
missed_container = MagicMock()
|
|
missed_container.id = "c-missed"
|
|
missed_container.name = "/missed-svc"
|
|
|
|
list_calls = {"n": 0}
|
|
|
|
def _list():
|
|
list_calls["n"] += 1
|
|
# First call (initial scan): empty. Subsequent (reconciler): one container.
|
|
if list_calls["n"] == 1:
|
|
return []
|
|
return [missed_container]
|
|
|
|
mock_client = MagicMock()
|
|
mock_client.containers.list.side_effect = _list
|
|
|
|
# First events() call raises a transient error that the
|
|
# watcher catches → triggers its real 1s backoff sleep. During
|
|
# that sleep the asyncio loop runs and the reconciler (ticking
|
|
# every 0.05s) gets ~20 chances to discover ``c-missed``.
|
|
# Second call returns an empty iterator → watcher exits
|
|
# cleanly so the test can unwind without a lingering thread.
|
|
events_calls = {"n": 0}
|
|
|
|
def _events_seq(**_kw):
|
|
events_calls["n"] += 1
|
|
if events_calls["n"] == 1:
|
|
raise RuntimeError("test: trigger backoff so reconciler can run")
|
|
return iter([])
|
|
|
|
mock_client.events.side_effect = _events_seq
|
|
|
|
with patch("docker.from_env", return_value=mock_client), \
|
|
patch("decnet.collector.worker.is_service_container", return_value=True):
|
|
try:
|
|
await asyncio.wait_for(log_collector_worker(log_file), timeout=2.0)
|
|
except (asyncio.TimeoutError, StopIteration, SystemExit):
|
|
pass
|
|
|
|
assert list_calls["n"] >= 2, (
|
|
"reconciler should have run at least once after the initial scan; "
|
|
f"got {list_calls['n']} calls to containers.list()"
|
|
)
|
|
|