Files
DECNET/tests/collector/test_session_aggregator.py
anti f2b3393669 chore: relicense to AGPL-3.0-or-later and add SPDX headers
Replaces LICENSE (GPLv3 -> AGPLv3) and prepends
`SPDX-License-Identifier: AGPL-3.0-or-later` to every source file
across decnet/, decnet_web/, tests/, scripts/, and tools/.

Rationale: closes the GPLv3 ASP loophole so any party operating a
modified DECNET as a network service must offer their modified
source. Personal copyright (Samuel Paschuan) + inbound=outbound
contributions make a future unilateral relicense infeasible.

- LICENSE: full AGPL-3.0 text (gnu.org/licenses/agpl-3.0.txt)
- COPYRIGHT: project copyright notice
- tools/add_spdx_headers.py: idempotent header injector
  (shebang- and PEP 263-aware)

Touches 1565 source files (.py, .ts, .tsx, .js, .jsx, .css, .sh).
No behavior change; comments only.
2026-05-22 21:04:16 -04:00

309 lines
11 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
"""Collector session aggregator emits ``attacker.session.ended``.
Pins the producer-side fix that closes the gap surfaced in TTP
debugging: the TTP worker subscribes to ``attacker.session.ended`` but
no upstream component published it. The aggregator indexes shell
``command`` events per attacker_ip and emits one envelope per
``session_recorded`` event with the commands that fall inside the
session window.
"""
from __future__ import annotations
from typing import Any
import pytest
from decnet.bus import topics as _topics
from decnet.collector.worker import _SessionAggregator
_ATTACKER_IP = "192.168.1.5"
def _cmd(ts_iso: str, text: str) -> dict[str, Any]:
return {
"timestamp": ts_iso,
"decky": "SRV-DELTA-77",
"service": "bash",
"event_type": "command",
"attacker_ip": _ATTACKER_IP,
"fields": {"command": text, "src": _ATTACKER_IP},
}
def _raw_cmd(ts_iso: str, msg: str) -> dict[str, Any]:
"""Parsed event whose bash CMD body is in ``msg``, fields={}.
Mirrors what the unmodified collector parser produces for
PROMPT_COMMAND lines (the parser deliberately keeps fields empty
so the frontend pill rendering doesn't double-up). The aggregator
now extracts uid/user/src/pwd/command from that msg body.
"""
return {
"timestamp": ts_iso,
"decky": "SRV-DELTA-77",
"service": "bash",
"event_type": "command",
"attacker_ip": _ATTACKER_IP,
"fields": {},
"msg": msg,
}
def _session_recorded(
ts_iso: str, sid: str, duration_s: float = 60.0,
) -> dict[str, Any]:
return {
"timestamp": ts_iso,
"decky": "omega-decky",
"service": "sessrec",
"event_type": "session_recorded",
"attacker_ip": _ATTACKER_IP,
"fields": {
"sid": sid,
"service": "ssh",
"duration_s": str(duration_s),
"src_ip": _ATTACKER_IP,
},
}
@pytest.fixture
def captured_publishes() -> list[tuple[str, dict[str, Any], str]]:
return []
@pytest.fixture
def aggregator(
captured_publishes: list[tuple[str, dict[str, Any], str]],
) -> _SessionAggregator:
def _publish(topic: str, payload: dict[str, Any], event_type: str) -> None:
captured_publishes.append((topic, payload, event_type))
return _SessionAggregator(_publish, ttl_sec=3600.0)
# ── Indexing ────────────────────────────────────────────────────────
def test_command_events_are_indexed_per_attacker_ip(
aggregator: _SessionAggregator,
) -> None:
aggregator.add_event(_cmd("2026-05-02T06:22:48", "whoami"))
aggregator.add_event(_cmd("2026-05-02T06:22:50", "id"))
assert len(aggregator._cmds[_ATTACKER_IP]) == 2
def test_unknown_attacker_ip_is_ignored(
aggregator: _SessionAggregator,
captured_publishes: list[tuple[str, dict[str, Any], str]],
) -> None:
bad = _cmd("2026-05-02T06:22:48", "whoami")
bad["attacker_ip"] = "Unknown"
aggregator.add_event(bad)
assert aggregator._cmds == {}
def test_unparseable_timestamp_is_skipped(
aggregator: _SessionAggregator,
) -> None:
bad = _cmd("not-a-timestamp", "whoami")
aggregator.add_event(bad)
assert aggregator._cmds == {}
# ── Session emission ────────────────────────────────────────────────
def test_session_recorded_emits_attacker_session_ended(
aggregator: _SessionAggregator,
captured_publishes: list[tuple[str, dict[str, Any], str]],
) -> None:
aggregator.add_event(_cmd("2026-05-02T06:22:48", "whoami"))
aggregator.add_event(_cmd("2026-05-02T06:23:00", "id"))
aggregator.add_event(_cmd("2026-05-02T06:23:10", "uname -a"))
aggregator.add_event(_session_recorded(
"2026-05-02T06:23:30", sid="sess-123", duration_s=120.0,
))
assert len(captured_publishes) == 1
topic, payload, event_type = captured_publishes[0]
assert topic == _topics.attacker(_topics.ATTACKER_SESSION_ENDED)
assert event_type == _topics.ATTACKER_SESSION_ENDED
assert payload["session_id"] == "sess-123"
assert payload["attacker_ip"] == _ATTACKER_IP
assert payload["decky_id"] == "omega-decky"
assert payload["service"] == "ssh"
assert payload["duration_s"] == 120.0
cmds = payload["commands"]
assert [c["command_text"] for c in cmds] == ["whoami", "id", "uname -a"]
assert [c["id"] for c in cmds] == [
"sess-123#0", "sess-123#1", "sess-123#2",
]
def test_commands_outside_session_window_are_excluded(
aggregator: _SessionAggregator,
captured_publishes: list[tuple[str, dict[str, Any], str]],
) -> None:
"""duration_s window is [ended_at - duration_s, ended_at]."""
# Old command from 10 minutes before the session — out of window
# for a 60-second session.
aggregator.add_event(_cmd("2026-05-02T06:13:00", "older-than-window"))
# In-window
aggregator.add_event(_cmd("2026-05-02T06:22:50", "whoami"))
aggregator.add_event(_session_recorded(
"2026-05-02T06:23:00", sid="s1", duration_s=60.0,
))
payload = captured_publishes[0][1]
cmd_texts = [c["command_text"] for c in payload["commands"]]
assert "whoami" in cmd_texts
assert "older-than-window" not in cmd_texts
def test_back_to_back_sessions_emit_distinct_envelopes(
aggregator: _SessionAggregator,
captured_publishes: list[tuple[str, dict[str, Any], str]],
) -> None:
aggregator.add_event(_cmd("2026-05-02T06:22:50", "whoami"))
aggregator.add_event(_session_recorded(
"2026-05-02T06:23:00", sid="s1", duration_s=60.0,
))
aggregator.add_event(_cmd("2026-05-02T06:30:00", "ls"))
aggregator.add_event(_session_recorded(
"2026-05-02T06:30:30", sid="s2", duration_s=60.0,
))
assert len(captured_publishes) == 2
s1, s2 = captured_publishes[0][1], captured_publishes[1][1]
assert s1["session_id"] == "s1"
assert s2["session_id"] == "s2"
# The second session window is 60s — only "ls" lands in it.
assert [c["command_text"] for c in s2["commands"]] == ["ls"]
def test_session_without_sid_falls_back_to_synthetic_id(
aggregator: _SessionAggregator,
captured_publishes: list[tuple[str, dict[str, Any], str]],
) -> None:
aggregator.add_event(_cmd("2026-05-02T06:22:50", "whoami"))
sr = _session_recorded("2026-05-02T06:23:00", sid="", duration_s=60.0)
sr["fields"]["sid"] = ""
aggregator.add_event(sr)
payload = captured_publishes[0][1]
assert payload["session_id"] is None
cmd_id = payload["commands"][0]["id"]
assert cmd_id.startswith(f"{_ATTACKER_IP}-2026-05-02T06:22:50")
# ── TTL eviction ────────────────────────────────────────────────────
def test_ttl_eviction_drops_old_commands() -> None:
publishes: list[tuple[str, dict[str, Any], str]] = []
def _publish(topic: str, payload: dict[str, Any], event_type: str) -> None:
publishes.append((topic, payload, event_type))
agg = _SessionAggregator(_publish, ttl_sec=10.0)
agg.add_event(_cmd("2026-05-02T06:00:00", "old"))
# New command 30 seconds later — TTL=10s, so the old one evicts.
agg.add_event(_cmd("2026-05-02T06:00:30", "fresh"))
remaining = [
p.get("fields", {}).get("command")
for _, p in agg._cmds[_ATTACKER_IP]
]
assert remaining == ["fresh"]
def test_session_emits_structured_uid_user_src_pwd_when_msg_carries_them(
aggregator: _SessionAggregator,
captured_publishes: list[tuple[str, dict[str, Any], str]],
) -> None:
"""The bash PROMPT_COMMAND msg body splits into structured fields.
Pins the "inspector wants UID/SRC/PWD/CMD on separate rows"
contract. Without this the inspector sees one big
``CMD uid=0 user=root src=… cmd=…`` string and operators have to
eyeball the cmd= portion out of the prefix garbage.
"""
aggregator.add_event(_raw_cmd(
"2026-05-02T06:22:48",
"CMD uid=0 user=root src=192.168.1.5 pwd=/root "
"cmd=nmap -p- 192.168.1.0/24",
))
aggregator.add_event(_session_recorded(
"2026-05-02T06:23:00", sid="sess-x", duration_s=120.0,
))
payload = captured_publishes[0][1]
cmd = payload["commands"][0]
assert cmd["uid"] == "0"
assert cmd["user"] == "root"
assert cmd["src"] == "192.168.1.5"
assert cmd["pwd"] == "/root"
# ``command_text`` is the cmd= value, NOT the full "CMD uid=…" line.
# nmap's command line carries spaces — we must preserve them.
assert cmd["command_text"] == "nmap -p- 192.168.1.0/24"
def test_publish_failure_is_swallowed() -> None:
"""A blowing-up publish must not propagate into the stream thread."""
def _bad(_t: str, _p: dict[str, Any], _e: str) -> None:
raise RuntimeError("bus exploded")
agg = _SessionAggregator(_bad, ttl_sec=3600.0)
agg.add_event(_cmd("2026-05-02T06:22:50", "whoami"))
# Should NOT raise.
agg.add_event(_session_recorded("2026-05-02T06:23:00", sid="s1"))
# ── shard_path enrichment (W.1) ─────────────────────────────────────
def test_session_ended_payload_carries_shard_path_when_shard_exists(
aggregator: _SessionAggregator,
captured_publishes: list[tuple[str, dict[str, Any], str]],
tmp_path,
monkeypatch,
) -> None:
"""When find_shard_with_sid resolves, the payload carries the path."""
import json
from decnet.artifacts import shards
sid = "11111111-2222-3333-4444-555555555555"
shard_dir = tmp_path / "omega-decky" / "ssh" / "transcripts"
shard_dir.mkdir(parents=True)
shard = shard_dir / "sessions-2026-05-02.jsonl"
shard.write_text(json.dumps({"sid": sid, "hdr": {}}) + "\n")
monkeypatch.setattr(shards, "ARTIFACTS_ROOT", tmp_path)
shards._INDEX_CACHE.clear()
aggregator.add_event(_cmd("2026-05-02T06:22:48", "whoami"))
aggregator.add_event(_session_recorded(
"2026-05-02T06:23:00", sid=sid, duration_s=120.0,
))
payload = captured_publishes[0][1]
assert payload["shard_path"] == str(shard.resolve())
def test_session_ended_payload_shard_path_none_when_unresolvable(
aggregator: _SessionAggregator,
captured_publishes: list[tuple[str, dict[str, Any], str]],
tmp_path,
monkeypatch,
) -> None:
"""No shard on disk → shard_path is None (consumer skips honestly)."""
from decnet.artifacts import shards
monkeypatch.setattr(shards, "ARTIFACTS_ROOT", tmp_path)
shards._INDEX_CACHE.clear()
aggregator.add_event(_cmd("2026-05-02T06:22:48", "whoami"))
aggregator.add_event(_session_recorded(
"2026-05-02T06:23:00", sid="ffffffff-eeee-dddd-cccc-bbbbbbbbbbbb",
))
payload = captured_publishes[0][1]
assert payload["shard_path"] is None