merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

View File

View File

@@ -0,0 +1,84 @@
"""
Tests for bounty deduplication.
Identical (bounty_type, attacker_ip, payload) tuples must be dropped so
aggressive scanners cannot saturate the bounty table.
"""
import pytest
from decnet.web.db.factory import get_repository
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "test.db"))
await r.initialize()
return r
_BASE = {
"decky": "decky-01",
"service": "ssh",
"attacker_ip": "10.0.0.1",
"bounty_type": "credential",
"payload": {"username": "admin", "password": "password"},
}
@pytest.mark.anyio
async def test_duplicate_dropped(repo):
await repo.add_bounty({**_BASE})
await repo.add_bounty({**_BASE})
bounties = await repo.get_bounties()
assert len(bounties) == 1
@pytest.mark.anyio
async def test_different_ip_not_deduped(repo):
await repo.add_bounty({**_BASE})
await repo.add_bounty({**_BASE, "attacker_ip": "10.0.0.2"})
bounties = await repo.get_bounties()
assert len(bounties) == 2
@pytest.mark.anyio
async def test_different_type_not_deduped(repo):
await repo.add_bounty({**_BASE})
await repo.add_bounty({**_BASE, "bounty_type": "fingerprint"})
bounties = await repo.get_bounties()
assert len(bounties) == 2
@pytest.mark.anyio
async def test_different_payload_not_deduped(repo):
await repo.add_bounty({**_BASE})
await repo.add_bounty({**_BASE, "payload": {"username": "root", "password": "toor"}})
bounties = await repo.get_bounties()
assert len(bounties) == 2
@pytest.mark.anyio
async def test_flood_protection(repo):
for _ in range(50):
await repo.add_bounty({**_BASE})
bounties = await repo.get_bounties()
assert len(bounties) == 1
@pytest.mark.anyio
async def test_dict_payload_dedup(repo):
"""Payload passed as dict (pre-serialisation path) is still deduped."""
await repo.add_bounty({**_BASE, "payload": {"username": "admin", "password": "password"}})
await repo.add_bounty({**_BASE, "payload": {"username": "admin", "password": "password"}})
bounties = await repo.get_bounties()
assert len(bounties) == 1
@pytest.mark.anyio
async def test_string_payload_dedup(repo):
"""Payload passed as pre-serialised string is also deduped."""
import json
p = json.dumps({"username": "admin", "password": "password"})
await repo.add_bounty({**_BASE, "payload": p})
await repo.add_bounty({**_BASE, "payload": p})
bounties = await repo.get_bounties()
assert len(bounties) == 1

View File

@@ -0,0 +1,606 @@
"""
Tests for the DECNET cross-decky correlation engine.
Covers:
- RFC 5424 line parsing (parser.py)
- Traversal graph data types (graph.py)
- CorrelationEngine ingestion, querying, and reporting (engine.py)
"""
from __future__ import annotations
import json
import re
from datetime import datetime
from decnet.correlation.parser import LogEvent, parse_line
from decnet.correlation.graph import AttackerTraversal, TraversalHop
from decnet.correlation.engine import CorrelationEngine, _fmt_duration
from decnet.logging.syslog_formatter import format_rfc5424, SEVERITY_INFO, SEVERITY_WARNING
# ---------------------------------------------------------------------------
# Fixtures & helpers
# ---------------------------------------------------------------------------
_TS = "2026-04-04T10:00:00+00:00"
_TS2 = "2026-04-04T10:05:00+00:00"
_TS3 = "2026-04-04T10:10:00+00:00"
def _make_line(
service: str = "http",
hostname: str = "decky-01",
event_type: str = "connection",
src_ip: str = "1.2.3.4",
timestamp: str = _TS,
extra_fields: dict | None = None,
) -> str:
"""Build a real RFC 5424 DECNET syslog line via the formatter."""
fields = {}
if src_ip:
fields["src_ip"] = src_ip
if extra_fields:
fields.update(extra_fields)
return format_rfc5424(
service=service,
hostname=hostname,
event_type=event_type,
severity=SEVERITY_INFO,
timestamp=datetime.fromisoformat(timestamp),
**fields,
)
def _make_line_src(hostname: str, src: str, timestamp: str = _TS) -> str:
"""Build a line that uses `src` instead of `src_ip` (mssql style)."""
return format_rfc5424(
service="mssql",
hostname=hostname,
event_type="unknown_packet",
severity=SEVERITY_INFO,
timestamp=datetime.fromisoformat(timestamp),
src=src,
)
# ---------------------------------------------------------------------------
# parser.py — parse_line
# ---------------------------------------------------------------------------
class TestParserBasic:
def test_returns_none_for_blank(self):
assert parse_line("") is None
assert parse_line(" ") is None
def test_returns_none_for_non_rfc5424(self):
assert parse_line("this is not a syslog line") is None
assert parse_line("Jan 1 00:00:00 host sshd: blah") is None
def test_returns_log_event(self):
event = parse_line(_make_line())
assert isinstance(event, LogEvent)
def test_hostname_extracted(self):
event = parse_line(_make_line(hostname="decky-07"))
assert event.decky == "decky-07"
def test_service_extracted(self):
event = parse_line(_make_line(service="ftp"))
assert event.service == "ftp"
def test_event_type_extracted(self):
event = parse_line(_make_line(event_type="login_attempt"))
assert event.event_type == "login_attempt"
def test_timestamp_parsed(self):
event = parse_line(_make_line(timestamp=_TS))
assert event.timestamp == datetime.fromisoformat(_TS)
def test_raw_line_preserved(self):
line = _make_line()
event = parse_line(line)
assert event.raw == line.strip()
class TestParserAttackerIP:
def test_src_ip_field(self):
event = parse_line(_make_line(src_ip="10.0.0.1"))
assert event.attacker_ip == "10.0.0.1"
def test_src_field_fallback(self):
"""mssql logs use `src` instead of `src_ip`."""
event = parse_line(_make_line_src("decky-win", "192.168.1.5"))
assert event.attacker_ip == "192.168.1.5"
def test_no_ip_field_gives_none(self):
line = format_rfc5424("http", "decky-01", "startup", SEVERITY_INFO)
event = parse_line(line)
assert event is not None
assert event.attacker_ip is None
def test_extra_fields_in_dict(self):
event = parse_line(_make_line(extra_fields={"username": "root", "password": "admin"}))
assert event.fields["username"] == "root"
assert event.fields["password"] == "admin"
def test_src_ip_priority_over_src(self):
"""src_ip should win when both are present."""
line = format_rfc5424(
"mssql", "decky-01", "evt", SEVERITY_INFO,
timestamp=datetime.fromisoformat(_TS),
src_ip="1.1.1.1",
src="2.2.2.2",
)
event = parse_line(line)
assert event.attacker_ip == "1.1.1.1"
def test_sd_escape_chars_decoded(self):
"""Escaped characters in SD values should be unescaped."""
line = format_rfc5424(
"http", "decky-01", "evt", SEVERITY_INFO,
timestamp=datetime.fromisoformat(_TS),
src_ip="1.2.3.4",
path='/search?q=a"b',
)
event = parse_line(line)
assert '"' in event.fields["path"]
def test_nilvalue_hostname_skipped(self):
line = format_rfc5424("-", "decky-01", "evt", SEVERITY_INFO)
assert parse_line(line) is None
def test_nilvalue_service_skipped(self):
line = format_rfc5424("http", "-", "evt", SEVERITY_INFO)
assert parse_line(line) is None
def test_attacker_ip_from_sshd_prose(self):
"""sshd routed via rsyslog has no SD block — IP lives in free prose.
Anchored "from <ip>" must beat the local listener in
"Connection from X port Y on Z port 22"."""
cases = [
(
"<38>1 2026-04-27T03:08:48+00:00 dmz-gateway sshd - - - "
"Failed password for root from 157.66.144.16 port 42772 ssh2",
"157.66.144.16",
),
(
"<38>1 2026-04-27T03:08:45+00:00 dmz-gateway sshd - - - "
"Connection from 157.66.144.16 port 42772 on 10.0.0.2 port 22",
"157.66.144.16",
),
(
"<38>1 2026-04-27T03:08:46+00:00 dmz-gateway sshd - - - "
"pam_unix(sshd:auth): authentication failure; rhost=157.66.144.16 user=root",
"157.66.144.16",
),
]
for line, expected in cases:
event = parse_line(line)
assert event is not None, line
assert event.attacker_ip == expected, (line, event.attacker_ip)
# ---------------------------------------------------------------------------
# graph.py — AttackerTraversal
# ---------------------------------------------------------------------------
def _make_traversal(ip: str, hops_spec: list[tuple]) -> AttackerTraversal:
"""hops_spec: list of (ts_str, decky, service, event_type)"""
hops = [
TraversalHop(
timestamp=datetime.fromisoformat(ts),
decky=decky,
service=svc,
event_type=evt,
)
for ts, decky, svc, evt in hops_spec
]
return AttackerTraversal(attacker_ip=ip, hops=hops)
class TestTraversalGraph:
def setup_method(self):
self.t = _make_traversal("5.6.7.8", [
(_TS, "decky-01", "ssh", "login_attempt"),
(_TS2, "decky-03", "http", "request"),
(_TS3, "decky-05", "ftp", "auth_attempt"),
])
def test_first_seen(self):
assert self.t.first_seen == datetime.fromisoformat(_TS)
def test_last_seen(self):
assert self.t.last_seen == datetime.fromisoformat(_TS3)
def test_duration_seconds(self):
assert self.t.duration_seconds == 600.0
def test_deckies_ordered(self):
assert self.t.deckies == ["decky-01", "decky-03", "decky-05"]
def test_decky_count(self):
assert self.t.decky_count == 3
def test_path_string(self):
assert self.t.path == "decky-01 → decky-03 → decky-05"
def test_to_dict_keys(self):
d = self.t.to_dict()
assert d["attacker_ip"] == "5.6.7.8"
assert d["decky_count"] == 3
assert d["hop_count"] == 3
assert len(d["hops"]) == 3
assert d["path"] == "decky-01 → decky-03 → decky-05"
def test_to_dict_hops_structure(self):
hop = self.t.to_dict()["hops"][0]
assert set(hop.keys()) == {"timestamp", "decky", "service", "event_type"}
def test_repeated_decky_not_double_counted_in_path(self):
t = _make_traversal("1.1.1.1", [
(_TS, "decky-01", "ssh", "conn"),
(_TS2, "decky-02", "ftp", "conn"),
(_TS3, "decky-01", "ssh", "conn"), # revisit
])
assert t.deckies == ["decky-01", "decky-02"]
assert t.decky_count == 2
# ---------------------------------------------------------------------------
# engine.py — CorrelationEngine
# ---------------------------------------------------------------------------
class TestEngineIngestion:
def test_ingest_returns_event(self):
engine = CorrelationEngine()
evt = engine.ingest(_make_line())
assert evt is not None
def test_ingest_blank_returns_none(self):
engine = CorrelationEngine()
assert engine.ingest("") is None
def test_lines_parsed_counter(self):
engine = CorrelationEngine()
engine.ingest(_make_line())
engine.ingest("garbage")
assert engine.lines_parsed == 2
def test_events_indexed_counter(self):
engine = CorrelationEngine()
engine.ingest(_make_line(src_ip="1.2.3.4"))
engine.ingest(_make_line(src_ip="")) # no IP
assert engine.events_indexed == 1
def test_ingest_file(self, tmp_path):
log = tmp_path / "decnet.log"
lines = [
_make_line("ssh", "decky-01", "conn", "10.0.0.1", _TS),
_make_line("http", "decky-02", "req", "10.0.0.1", _TS2),
_make_line("ftp", "decky-03", "auth", "10.0.0.1", _TS3),
]
log.write_text("\n".join(lines))
engine = CorrelationEngine()
count = engine.ingest_file(log)
assert count == 3
class TestEngineTraversals:
def _engine_with(self, specs: list[tuple]) -> CorrelationEngine:
"""specs: (service, decky, event_type, src_ip, timestamp)"""
engine = CorrelationEngine()
for svc, decky, evt, ip, ts in specs:
engine.ingest(_make_line(svc, decky, evt, ip, ts))
return engine
def test_single_decky_not_a_traversal(self):
engine = self._engine_with([
("ssh", "decky-01", "conn", "1.1.1.1", _TS),
("ssh", "decky-01", "conn", "1.1.1.1", _TS2),
])
assert engine.traversals() == []
def test_two_deckies_is_traversal(self):
engine = self._engine_with([
("ssh", "decky-01", "conn", "1.1.1.1", _TS),
("http", "decky-02", "req", "1.1.1.1", _TS2),
])
t = engine.traversals()
assert len(t) == 1
assert t[0].attacker_ip == "1.1.1.1"
assert t[0].decky_count == 2
def test_prober_event_does_not_count_as_traversal(self):
"""Hit live on first VPS deploy: every fingerprinted attacker
showed up as a 2-decky traversal because the prober's outbound
fingerprint events (decky=decnet-prober, target_ip=<attacker>)
got co-indexed with the attacker's actual decoy hops. The
prober is internal infrastructure, not a hop — its events
must not bump the distinct-decky count."""
engine = self._engine_with([
("ssh", "dmz-gateway", "conn", "1.1.1.1", _TS),
("ssh", "decnet-prober", "hassh_fingerprint", "1.1.1.1", _TS2),
])
# Only one *real* decky touched — no traversal.
assert engine.traversals() == []
def test_prober_excluded_from_traversal_path(self):
"""When a real traversal exists, the prober's hops must not
appear in the path or inflate the decky count."""
engine = self._engine_with([
("ssh", "dmz-gateway", "conn", "1.1.1.1", _TS),
("ssh", "decnet-prober", "hassh_fingerprint", "1.1.1.1", _TS2),
("http", "decky-internal", "req", "1.1.1.1", _TS3),
])
traversals = engine.traversals()
assert len(traversals) == 1
t = traversals[0]
assert t.decky_count == 2, (
f"prober should not inflate decky_count; got {t.decky_count}"
)
assert "decnet-prober" not in t.path, (
f"prober should not appear in traversal path; got {t.path!r}"
)
def test_min_deckies_filter(self):
engine = self._engine_with([
("ssh", "decky-01", "conn", "1.1.1.1", _TS),
("http", "decky-02", "req", "1.1.1.1", _TS2),
("ftp", "decky-03", "auth", "1.1.1.1", _TS3),
])
assert len(engine.traversals(min_deckies=3)) == 1
assert len(engine.traversals(min_deckies=4)) == 0
def test_multiple_attackers_separate_traversals(self):
engine = self._engine_with([
("ssh", "decky-01", "conn", "1.1.1.1", _TS),
("http", "decky-02", "req", "1.1.1.1", _TS2),
("ssh", "decky-03", "conn", "9.9.9.9", _TS),
("ftp", "decky-04", "auth", "9.9.9.9", _TS2),
])
traversals = engine.traversals()
assert len(traversals) == 2
ips = {t.attacker_ip for t in traversals}
assert ips == {"1.1.1.1", "9.9.9.9"}
def test_traversals_sorted_by_first_seen(self):
engine = self._engine_with([
("ssh", "decky-01", "conn", "9.9.9.9", _TS2), # later
("ftp", "decky-02", "auth", "9.9.9.9", _TS3),
("http", "decky-03", "req", "1.1.1.1", _TS), # earlier
("smb", "decky-04", "auth", "1.1.1.1", _TS2),
])
traversals = engine.traversals()
assert traversals[0].attacker_ip == "1.1.1.1"
assert traversals[1].attacker_ip == "9.9.9.9"
def test_hops_ordered_chronologically(self):
engine = self._engine_with([
("ftp", "decky-02", "auth", "5.5.5.5", _TS2), # ingested first but later ts
("ssh", "decky-01", "conn", "5.5.5.5", _TS),
])
t = engine.traversals()[0]
assert t.hops[0].decky == "decky-01"
assert t.hops[1].decky == "decky-02"
def test_all_attackers(self):
engine = self._engine_with([
("ssh", "decky-01", "conn", "1.1.1.1", _TS),
("ssh", "decky-01", "conn", "1.1.1.1", _TS2),
("ssh", "decky-01", "conn", "2.2.2.2", _TS),
])
attackers = engine.all_attackers()
assert attackers["1.1.1.1"] == 2
assert attackers["2.2.2.2"] == 1
def test_mssql_src_field_correlated(self):
"""Verify that `src=` (mssql style) is picked up for cross-decky correlation."""
engine = CorrelationEngine()
engine.ingest(_make_line_src("decky-win1", "10.10.10.5", _TS))
engine.ingest(_make_line_src("decky-win2", "10.10.10.5", _TS2))
t = engine.traversals()
assert len(t) == 1
assert t[0].decky_count == 2
class TestEngineReporting:
def _two_decky_engine(self) -> CorrelationEngine:
engine = CorrelationEngine()
engine.ingest(_make_line("ssh", "decky-01", "conn", "3.3.3.3", _TS))
engine.ingest(_make_line("http", "decky-02", "req", "3.3.3.3", _TS2))
return engine
def test_report_json_structure(self):
engine = self._two_decky_engine()
report = engine.report_json()
assert "stats" in report
assert "traversals" in report
assert report["stats"]["traversals"] == 1
t = report["traversals"][0]
assert t["attacker_ip"] == "3.3.3.3"
assert t["decky_count"] == 2
def test_report_json_serialisable(self):
engine = self._two_decky_engine()
# Should not raise
json.dumps(engine.report_json())
def test_report_table_returns_rich_table(self):
from rich.table import Table
engine = self._two_decky_engine()
table = engine.report_table()
assert isinstance(table, Table)
def test_traversal_syslog_lines_count(self):
engine = self._two_decky_engine()
lines = engine.traversal_syslog_lines()
assert len(lines) == 1
def test_traversal_syslog_line_is_rfc5424(self):
engine = self._two_decky_engine()
line = engine.traversal_syslog_lines()[0]
# Must match RFC 5424 header
assert re.match(r"^<\d+>1 \S+ \S+ correlator - traversal_detected", line)
def test_traversal_syslog_contains_attacker_ip(self):
engine = self._two_decky_engine()
line = engine.traversal_syslog_lines()[0]
assert "3.3.3.3" in line
def test_traversal_syslog_severity_is_warning(self):
engine = self._two_decky_engine()
line = engine.traversal_syslog_lines()[0]
pri = int(re.match(r"^<(\d+)>", line).group(1))
assert pri == 16 * 8 + SEVERITY_WARNING # local0 + warning
def test_no_traversals_empty_json(self):
engine = CorrelationEngine()
engine.ingest(_make_line()) # single decky, no traversal
assert engine.report_json()["stats"]["traversals"] == 0
assert engine.traversal_syslog_lines() == []
# ---------------------------------------------------------------------------
# _fmt_duration helper
# ---------------------------------------------------------------------------
class TestFmtDuration:
def test_seconds(self):
assert _fmt_duration(45) == "45s"
def test_minutes(self):
assert _fmt_duration(90) == "1.5m"
def test_hours(self):
assert _fmt_duration(7200) == "2.0h"
# ---------------------------------------------------------------------------
# Mutation-event stream (parser kind + engine index + graph markers)
# ---------------------------------------------------------------------------
def _mutation_line(
decky: str,
*,
old: str = "",
new: str = "ssh",
trigger: str = "scheduled",
timestamp: str = _TS,
) -> str:
return format_rfc5424(
service="mutator",
hostname=decky,
event_type="decky_mutated",
severity=SEVERITY_INFO,
timestamp=datetime.fromisoformat(timestamp),
decky=decky,
old_services=old,
new_services=new,
trigger=trigger,
)
class TestParserMutationKind:
def test_mutator_line_kind_is_mutation(self):
ev = parse_line(_mutation_line("decky-01", old="ssh", new="rdp",
trigger="scheduled"))
assert ev is not None
assert ev.kind == "mutation"
def test_default_kind_is_attacker(self):
ev = parse_line(_make_line())
assert ev is not None
assert ev.kind == "attacker"
def test_non_mutator_service_stays_attacker(self):
# Same event_type but different service ⇒ not a mutation
line = format_rfc5424(
service="ssh",
hostname="decky-01",
event_type="decky_mutated",
severity=SEVERITY_INFO,
timestamp=datetime.fromisoformat(_TS),
src_ip="1.1.1.1",
)
ev = parse_line(line)
assert ev is not None
assert ev.kind == "attacker"
class TestEngineMutationIndex:
def test_mutation_indexed_separately(self):
engine = CorrelationEngine()
engine.ingest(_mutation_line("decky-01", old="ssh", new="rdp"))
assert engine.mutations_indexed == 1
assert engine.events_indexed == 0
assert "decky-01" in engine._mutations
assert "decky-01" not in engine._events
def test_mutations_interleaved_into_traversal(self):
engine = CorrelationEngine()
# Attacker hits decky-01 and decky-02; decky-01 mutates in between
engine.ingest(_make_line(hostname="decky-01", src_ip="9.9.9.9",
timestamp=_TS))
engine.ingest(_mutation_line("decky-01", old="ssh", new="rdp",
trigger="scheduled", timestamp=_TS2))
engine.ingest(_make_line(hostname="decky-02", src_ip="9.9.9.9",
timestamp=_TS3))
traversals = engine.traversals()
assert len(traversals) == 1
t = traversals[0]
assert len(t.mutations_during) == 1
m = t.mutations_during[0]
assert m.decky == "decky-01"
assert m.old_services == ["ssh"]
assert m.new_services == ["rdp"]
assert m.trigger == "scheduled"
def test_mutation_outside_window_excluded(self):
engine = CorrelationEngine()
# Mutation at _TS — before attacker first_seen at _TS2
engine.ingest(_mutation_line("decky-01", old="", new="ssh",
trigger="creation", timestamp=_TS))
engine.ingest(_make_line(hostname="decky-01", src_ip="9.9.9.9",
timestamp=_TS2))
engine.ingest(_make_line(hostname="decky-02", src_ip="9.9.9.9",
timestamp=_TS3))
t = engine.traversals()[0]
# The creation happened BEFORE first contact, so it's not "during"
assert t.mutations_during == []
def test_mutation_on_untouched_decky_excluded(self):
engine = CorrelationEngine()
engine.ingest(_make_line(hostname="decky-01", src_ip="9.9.9.9",
timestamp=_TS))
engine.ingest(_make_line(hostname="decky-02", src_ip="9.9.9.9",
timestamp=_TS3))
# decky-03 mutates mid-window but the attacker never touched it
engine.ingest(_mutation_line("decky-03", old="ftp", new="smtp",
trigger="operator", timestamp=_TS2))
t = engine.traversals()[0]
assert t.mutations_during == []
def test_to_dict_includes_timeline_with_markers(self):
engine = CorrelationEngine()
engine.ingest(_make_line(hostname="decky-01", src_ip="9.9.9.9",
timestamp=_TS))
engine.ingest(_mutation_line("decky-01", old="ssh", new="rdp",
trigger="scheduled", timestamp=_TS2))
engine.ingest(_make_line(hostname="decky-02", src_ip="9.9.9.9",
timestamp=_TS3))
d = engine.traversals()[0].to_dict()
assert len(d["mutations_during"]) == 1
assert d["mutations_during"][0]["trigger"] == "scheduled"
kinds = [entry["kind"] for entry in d["timeline"]]
assert kinds == ["hop", "mutation", "hop"]
def test_report_json_serialisable_with_mutations(self):
engine = CorrelationEngine()
engine.ingest(_make_line(hostname="decky-01", src_ip="9.9.9.9",
timestamp=_TS))
engine.ingest(_mutation_line("decky-01", old="ssh", new="rdp",
trigger="scheduled", timestamp=_TS2))
engine.ingest(_make_line(hostname="decky-02", src_ip="9.9.9.9",
timestamp=_TS3))
json.dumps(engine.report_json()) # must not raise

View File

@@ -0,0 +1,152 @@
"""Bus wiring for the correlation engine (DEBT-031, worker 3).
The correlator is not a standalone worker — ``CorrelationEngine`` is a
batch class instantiated inside the profiler worker. DEBT-031 wires it
via an optional ``publish_fn`` constructor arg: on the first sighting of
an attacker IP, the engine emits ``("observed", payload)`` through the
hook. The profiler worker carries the bus physically and translates
those sync hook calls into ``attacker.observed`` publishes.
"""
from __future__ import annotations
import asyncio
from datetime import datetime
import pytest
import pytest_asyncio
from decnet.bus import topics as _topics
from decnet.bus.fake import FakeBus
from decnet.bus.publish import make_thread_safe_publisher
from decnet.correlation.engine import CorrelationEngine
from decnet.logging.syslog_formatter import SEVERITY_INFO, format_rfc5424
_TS = "2026-04-21T10:00:00+00:00"
def _line(ip: str = "1.2.3.4", decky: str = "decky-01", event_type: str = "connection") -> str:
return format_rfc5424(
service="http",
hostname=decky,
event_type=event_type,
severity=SEVERITY_INFO,
timestamp=datetime.fromisoformat(_TS),
src_ip=ip,
)
@pytest_asyncio.fixture
async def bus() -> FakeBus:
b = FakeBus()
await b.connect()
yield b
await b.close()
# ─── Engine-level publish hook ───────────────────────────────────────────────
def test_engine_publishes_once_on_first_sighting() -> None:
captured: list[tuple[str, dict]] = []
engine = CorrelationEngine(
publish_fn=lambda event_type, payload: captured.append((event_type, payload)),
)
# Same IP three times: only the first should publish.
engine.ingest(_line(ip="9.9.9.9"))
engine.ingest(_line(ip="9.9.9.9", event_type="login"))
engine.ingest(_line(ip="9.9.9.9", decky="decky-02"))
assert len(captured) == 1
event_type, payload = captured[0]
assert event_type == "observed"
assert payload["attacker_ip"] == "9.9.9.9"
assert payload["decky"] == "decky-01"
assert payload["service"] == "http"
assert payload["event_type"] == "connection"
assert payload["first_seen"].startswith("2026-04-21T10:00:00")
def test_engine_publishes_per_unique_ip() -> None:
captured: list[tuple[str, dict]] = []
engine = CorrelationEngine(
publish_fn=lambda event_type, payload: captured.append((event_type, payload)),
)
engine.ingest(_line(ip="1.1.1.1"))
engine.ingest(_line(ip="2.2.2.2"))
engine.ingest(_line(ip="1.1.1.1")) # dup, no publish
engine.ingest(_line(ip="3.3.3.3"))
ips = [p["attacker_ip"] for _, p in captured]
assert ips == ["1.1.1.1", "2.2.2.2", "3.3.3.3"]
def test_engine_swallows_publish_fn_failures() -> None:
# A publish hook that blows up must never break ingestion.
def _boom(_event_type, _payload):
raise RuntimeError("transport exploded")
engine = CorrelationEngine(publish_fn=_boom)
result = engine.ingest(_line(ip="5.5.5.5"))
assert result is not None
assert engine.events_indexed == 1
def test_engine_runs_unchanged_without_publish_fn() -> None:
# Pre-bus behavior. No hook, no publishes, same indexing result.
engine = CorrelationEngine()
engine.ingest(_line(ip="7.7.7.7"))
engine.ingest(_line(ip="7.7.7.7"))
assert engine.events_indexed == 2
def test_engine_ignores_lines_without_attacker_ip() -> None:
captured: list[tuple[str, dict]] = []
engine = CorrelationEngine(
publish_fn=lambda event_type, payload: captured.append((event_type, payload)),
)
# Line without src_ip — parser still returns a LogEvent but attacker_ip is empty.
line_no_ip = format_rfc5424(
service="http",
hostname="decky-01",
event_type="boot",
severity=SEVERITY_INFO,
timestamp=datetime.fromisoformat(_TS),
)
engine.ingest(line_no_ip)
assert captured == []
# ─── End-to-end through the bus ──────────────────────────────────────────────
@pytest.mark.asyncio
async def test_correlator_publishes_on_attacker_observed_topic(bus: FakeBus) -> None:
loop = asyncio.get_running_loop()
raw = make_thread_safe_publisher(bus, loop)
def publish(event_type: str, payload: dict) -> None:
raw(_topics.attacker(_topics.ATTACKER_OBSERVED), payload, event_type)
engine = CorrelationEngine(publish_fn=publish)
sub = bus.subscribe("attacker.observed")
async with sub:
engine.ingest(_line(ip="8.8.8.8"))
event = await asyncio.wait_for(sub.__anext__(), timeout=2.0)
assert event.topic == "attacker.observed"
assert event.type == "observed"
assert event.payload["attacker_ip"] == "8.8.8.8"
@pytest.mark.asyncio
async def test_correlator_degrades_cleanly_when_bus_disabled(monkeypatch: pytest.MonkeyPatch) -> None:
# DECNET_BUS_ENABLED=false returns NullBus; connect()+publish must never raise.
from decnet.bus.factory import get_bus
monkeypatch.setenv("DECNET_BUS_ENABLED", "false")
b = get_bus(client_name="profiler")
await b.connect()
await b.publish("attacker.observed", {"attacker_ip": "1.2.3.4"}, event_type="observed")
await b.close()

View File

@@ -0,0 +1,310 @@
"""Credential-reuse correlator tests.
Covers:
- ``CorrelationEngine.correlate_credential_reuse`` — group detection,
threshold gating, idempotency on a second call.
- ``run_reuse_loop`` — bus-driven wake, reuse.detected publish on
insert/grow, clean shutdown via the *shutdown* signal.
- Repo helper ``find_credential_reuse_candidates`` — used by the engine.
"""
from __future__ import annotations
import asyncio
import contextlib
import hashlib
from pathlib import Path
import pytest
from decnet.bus import topics as _topics
from decnet.bus.fake import FakeBus
from decnet.correlation.engine import CorrelationEngine
from decnet.correlation.reuse_worker import run_reuse_loop
from decnet.web.db.factory import get_repository
def _sha256(s: str) -> str:
return hashlib.sha256(s.encode("utf-8")).hexdigest()
@pytest.fixture
async def repo(tmp_path: Path):
r = get_repository(db_path=str(tmp_path / "reuse_corr.db"))
await r.initialize()
return r
async def _seed_credential(repo, **overrides):
base = {
"attacker_ip": "10.0.0.5",
"decky_name": "decky-01",
"service": "ssh",
"principal": "root",
"secret_kind": "plaintext",
"secret_sha256": _sha256("hunter2"),
"secret_b64": "aHVudGVyMg==",
"secret_printable": "hunter2",
"fields": {},
}
base.update(overrides)
return await repo.upsert_credential(base)
# ─── find_credential_reuse_candidates ────────────────────────────────────────
class TestFindCandidates:
@pytest.mark.anyio
async def test_below_threshold_excluded(self, repo) -> None:
sha = _sha256("solo")
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
groups = await repo.find_credential_reuse_candidates(min_targets=2)
assert groups == []
@pytest.mark.anyio
async def test_threshold_exact_match_included(self, repo) -> None:
sha = _sha256("p4ss")
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
await _seed_credential(repo, secret_sha256=sha, decky_name="d2", service="ftp")
groups = await repo.find_credential_reuse_candidates(min_targets=2)
assert len(groups) == 1
g = groups[0]
assert g["secret_sha256"] == sha
assert g["secret_kind"] == "plaintext"
assert g["target_count"] == 2
assert len(g["credentials"]) == 2
@pytest.mark.anyio
async def test_distinct_principals_form_distinct_groups(self, repo) -> None:
"""Same secret + different principals → two separate groups."""
sha = _sha256("hunter2")
await _seed_credential(
repo, secret_sha256=sha, principal="root",
decky_name="d1", service="ssh",
)
await _seed_credential(
repo, secret_sha256=sha, principal="root",
decky_name="d2", service="ftp",
)
await _seed_credential(
repo, secret_sha256=sha, principal="admin",
decky_name="d1", service="ssh",
)
await _seed_credential(
repo, secret_sha256=sha, principal="admin",
decky_name="d2", service="ftp",
)
groups = await repo.find_credential_reuse_candidates(min_targets=2)
principals = sorted(g["principal"] for g in groups)
assert principals == ["admin", "root"]
@pytest.mark.anyio
async def test_repeated_decky_service_does_not_count_twice(self, repo) -> None:
"""A repeat attempt on the same (decky, service) doesn't pad target_count."""
sha = _sha256("h2")
# Two attempts on the same decky/service → upsert dedups.
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
groups = await repo.find_credential_reuse_candidates(min_targets=2)
assert groups == []
# ─── CorrelationEngine.correlate_credential_reuse ────────────────────────────
class TestEngineCorrelate:
@pytest.mark.anyio
async def test_emits_reuse_for_qualifying_group(self, repo) -> None:
sha = _sha256("hunter2")
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
await _seed_credential(repo, secret_sha256=sha, decky_name="d2", service="ftp")
engine = CorrelationEngine()
results = await engine.correlate_credential_reuse(repo, min_targets=2)
assert len(results) >= 1
assert any(r.get("inserted") for r in results)
total, rows = await repo.list_credential_reuses(min_target_count=2)
assert total == 1
assert rows[0]["target_count"] == 2
@pytest.mark.anyio
async def test_below_threshold_persists_nothing(self, repo) -> None:
sha = _sha256("loner")
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
engine = CorrelationEngine()
results = await engine.correlate_credential_reuse(repo, min_targets=2)
assert results == []
total, _ = await repo.list_credential_reuses(min_target_count=2)
assert total == 0
@pytest.mark.anyio
async def test_idempotent_on_second_run(self, repo) -> None:
"""A second call with no new credentials returns no
insert/grow rows and leaves the table at the same row count.
"""
sha = _sha256("idempotent")
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
await _seed_credential(repo, secret_sha256=sha, decky_name="d2", service="ftp")
engine = CorrelationEngine()
await engine.correlate_credential_reuse(repo, min_targets=2)
before_total, _ = await repo.list_credential_reuses(min_target_count=2)
results2 = await engine.correlate_credential_reuse(repo, min_targets=2)
after_total, _ = await repo.list_credential_reuses(min_target_count=2)
assert before_total == after_total == 1
assert results2 == []
@pytest.mark.anyio
async def test_list_and_get_enrich_with_secret(self, repo) -> None:
"""``list_credential_reuses`` and ``get_credential_reuse_by_id``
must surface ``secret_printable`` + ``secret_b64`` from the
underlying ``Credential`` rows so the dashboard drawer can show
the actual secret instead of just its sha256.
"""
sha = _sha256("hunter2")
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
await _seed_credential(repo, secret_sha256=sha, decky_name="d2", service="ftp")
engine = CorrelationEngine()
await engine.correlate_credential_reuse(repo, min_targets=2)
_, rows = await repo.list_credential_reuses(min_target_count=2)
assert rows[0]["secret_printable"] == "hunter2"
assert rows[0]["secret_b64"] == "aHVudGVyMg=="
single = await repo.get_credential_reuse_by_id(rows[0]["id"])
assert single is not None
assert single["secret_printable"] == "hunter2"
assert single["secret_b64"] == "aHVudGVyMg=="
@pytest.mark.anyio
async def test_growth_emits_changed(self, repo) -> None:
"""Adding a third target after an initial reuse run yields a
``changed`` row on the next correlation pass.
"""
sha = _sha256("grower")
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
await _seed_credential(repo, secret_sha256=sha, decky_name="d2", service="ftp")
engine = CorrelationEngine()
await engine.correlate_credential_reuse(repo, min_targets=2)
await _seed_credential(repo, secret_sha256=sha, decky_name="d3", service="rdp")
results = await engine.correlate_credential_reuse(repo, min_targets=2)
assert any(r.get("changed") for r in results)
_, rows = await repo.list_credential_reuses(min_target_count=2)
assert rows[0]["target_count"] == 3
# ─── run_reuse_loop ──────────────────────────────────────────────────────────
class TestRunReuseLoop:
@pytest.mark.anyio
async def test_publishes_reuse_detected_on_insert(self, repo, monkeypatch) -> None:
"""One ``credential.reuse.detected`` per new CredentialReuse row."""
bus = FakeBus()
await bus.connect()
# Force the worker to pick up our FakeBus.
from decnet.correlation import reuse_worker as _rw
monkeypatch.setattr(_rw, "get_bus", lambda client_name=None: bus)
sha = _sha256("loop-insert")
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
await _seed_credential(repo, secret_sha256=sha, decky_name="d2", service="ftp")
sub = bus.subscribe(_topics.credential(_topics.CREDENTIAL_REUSE_DETECTED))
shutdown = asyncio.Event()
task = asyncio.create_task(run_reuse_loop(
repo, poll_interval_secs=60.0, min_targets=2, shutdown=shutdown,
))
# Wait for the first tick to publish.
async with sub:
event = await asyncio.wait_for(sub.__anext__(), timeout=5.0)
assert event.topic == _topics.credential(_topics.CREDENTIAL_REUSE_DETECTED)
assert event.payload["target_count"] == 2
assert event.payload["secret_kind"] == "plaintext"
shutdown.set()
task.cancel()
with contextlib.suppress(asyncio.CancelledError):
await task
await bus.close()
@pytest.mark.anyio
async def test_no_reuse_no_publish(self, repo, monkeypatch) -> None:
"""A loop with no qualifying groups publishes nothing on its tick."""
bus = FakeBus()
await bus.connect()
from decnet.correlation import reuse_worker as _rw
monkeypatch.setattr(_rw, "get_bus", lambda client_name=None: bus)
sha = _sha256("loner-loop")
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
sub = bus.subscribe(_topics.credential(_topics.CREDENTIAL_REUSE_DETECTED))
shutdown = asyncio.Event()
task = asyncio.create_task(run_reuse_loop(
repo, poll_interval_secs=0.05, min_targets=2, shutdown=shutdown,
))
# Let the loop run a few ticks.
await asyncio.sleep(0.3)
async with sub:
with pytest.raises(asyncio.TimeoutError):
await asyncio.wait_for(sub.__anext__(), timeout=0.1)
shutdown.set()
task.cancel()
with contextlib.suppress(asyncio.CancelledError):
await task
await bus.close()
@pytest.mark.anyio
async def test_no_duplicate_publish_on_second_tick(
self, repo, monkeypatch,
) -> None:
"""A subsequent tick with no new credentials must not republish."""
bus = FakeBus()
await bus.connect()
from decnet.correlation import reuse_worker as _rw
monkeypatch.setattr(_rw, "get_bus", lambda client_name=None: bus)
sha = _sha256("once")
await _seed_credential(repo, secret_sha256=sha, decky_name="d1", service="ssh")
await _seed_credential(repo, secret_sha256=sha, decky_name="d2", service="ftp")
sub = bus.subscribe(_topics.credential(_topics.CREDENTIAL_REUSE_DETECTED))
shutdown = asyncio.Event()
task = asyncio.create_task(run_reuse_loop(
repo, poll_interval_secs=0.05, min_targets=2, shutdown=shutdown,
))
# Drain the first publish (the insert).
async with sub:
await asyncio.wait_for(sub.__anext__(), timeout=5.0)
# Subsequent ticks must produce no further publishes.
with pytest.raises(asyncio.TimeoutError):
await asyncio.wait_for(sub.__anext__(), timeout=0.3)
shutdown.set()
task.cancel()
with contextlib.suppress(asyncio.CancelledError):
await task
await bus.close()

View File

@@ -0,0 +1,91 @@
"""Classifier unit tests for decnet.correlation.event_kinds."""
from __future__ import annotations
from decnet.correlation.event_kinds import (
INTERACTION_EVENT_TYPES,
NOISE_EVENT_TYPES,
bucket_services,
classify_event,
)
def test_shell_family_classifies_as_interaction():
for evt in ("command", "shell_input", "sql_query", "redis_command", "exec"):
assert classify_event(evt) == "interaction", evt
def test_smtp_engagement_classifies_as_interaction():
for evt in ("mail_from", "rcpt_to", "message_accepted"):
assert classify_event(evt) == "interaction", evt
def test_file_and_pubsub_classify_as_interaction():
for evt in ("file_captured", "upload", "retr", "publish", "subscribe"):
assert classify_event(evt) == "interaction", evt
def test_noise_events_classify_as_noise():
for evt in ("startup", "shutdown", "parse_error", "unknown_command"):
assert classify_event(evt) == "noise", evt
def test_scan_touch_events_classify_as_scan():
# These are common template verbs that don't cross into interaction
# and aren't on the noise list.
for evt in ("connection", "disconnect", "tls_client_hello", "auth_attempt",
"banner", "get_request", "head_request"):
assert classify_event(evt) == "scan", evt
def test_unknown_event_defaults_to_scan():
# Conservative default: an unknown verb from a new template should
# show up as "scanned" rather than over-credited as interaction.
assert classify_event("some_future_verb") == "scan"
assert classify_event("") == "scan"
def test_interaction_and_noise_sets_are_disjoint():
assert INTERACTION_EVENT_TYPES.isdisjoint(NOISE_EVENT_TYPES)
def test_bucket_services_single_interaction_wins():
# If a service has both scan-level and interaction-level events,
# it counts as interacted (not scanned).
pairs = [
("ssh", "connection"), # scan
("ssh", "shell_input"), # interaction → wins
]
assert bucket_services(pairs) == {"interacted": ["ssh"], "scanned": []}
def test_bucket_services_noise_only_service_dropped():
pairs = [("bus", "startup"), ("bus", "shutdown")]
assert bucket_services(pairs) == {"interacted": [], "scanned": []}
def test_bucket_services_mixed_realistic():
# Attacker A: scan-only on http + ssh.
# Attacker B (same test but for one attacker's pairs): mixed.
pairs = [
("http", "connection"),
("http", "get_request"),
("ssh", "connection"),
("ssh", "auth_attempt"),
("ssh", "shell_input"), # promotes ssh to interacted
("ftp", "retr"), # interaction
("mongo", "connection"), # scan only
]
result = bucket_services(pairs)
assert result["interacted"] == ["ftp", "ssh"]
assert result["scanned"] == ["http", "mongo"]
def test_bucket_services_empty_input():
assert bucket_services([]) == {"interacted": [], "scanned": []}
def test_bucket_services_returns_sorted_lists():
pairs = [("zzz", "command"), ("aaa", "command"), ("mmm", "connection")]
result = bucket_services(pairs)
assert result["interacted"] == ["aaa", "zzz"] # alphabetical
assert result["scanned"] == ["mmm"]