Files
DECNET/tests/profiler/behave_shell/test_handler_session_ended.py
anti 5ff89eefe7 feat(profiler): wire BEHAVE-SHELL extraction onto attacker.session.ended
The profiler worker now consumes attacker.session.ended on the bus
AND walks unprofiled session_recorded log rows on every tick. Both
paths converge on a single handler that:

1. Validates required payload fields (session_id, decky_id, service,
   attacker_ip, shard_path).
2. Builds evidence_ref shard:{decky}/{service}/{shard_basename}#{sid}
   and skips when has_observations_for_evidence is True (idempotent
   re-runs).
3. Resolves attacker_uuid via get_attacker_uuid_by_ip; defers if the
   profiler tick hasn't materialised the row yet.
4. Reads the asciinema shard, slices events for the sid, calls
   extract_session, persists each Observation via upsert_observation
   (per-row; batch transaction filed as follow-up), then publishes
   each on the bus best-effort (fire-and-forget per DEBT-029 §6).

Architecture:
* Handler lives in decnet/profiler/behave_shell/_handler.py — pure
  function, unit-tested in isolation.
* Worker.py adds _behave_pump (queue feed), _drain_behave_queue
  (per-tick drain), _behave_poll_tick (cursor scan over
  session_recorded logs), and _payload_from_log_row (Log → bus-shape
  payload projection).
* Poll cursor uses a separate state key
  (attacker_worker_session_cursor) so the correlation tick's cursor
  doesn't conflate.
* has_observations_for_evidence promoted to BaseRepository abstract.

22 new tests across handler / drain / poll layers covering happy
path, all skip paths, isolation against handler exceptions,
idempotency on re-run, and cursor key separation. TTP worker bus
tests still green — payload field is purely additive.

Closes BEHAVE-INTEGRATION.md Phase 4.
2026-05-08 18:57:45 -04:00

178 lines
6.3 KiB
Python

"""Unit tests for ``decnet.profiler.behave_shell._handler``.
Direct exercise of ``handle_session_ended()`` without the worker loop
or a real bus. The handler is the load-bearing piece — bus / poll
fallback paths in the worker just feed it. Pin the contract here.
"""
from __future__ import annotations
import json
from typing import Any
from unittest.mock import AsyncMock
import pytest
from decnet.profiler.behave_shell._handler import (
_build_evidence_ref,
handle_session_ended,
)
_SID = "11111111-2222-3333-4444-555555555555"
_DECKY = "test-decky"
_SERVICE = "ssh"
_IP = "10.0.0.5"
_ATTACKER_UUID = "att-uuid-abc"
def _write_shard(tmp_path, sid: str, lines: list[dict]) -> str:
"""Write a synthetic asciinema shard JSONL and return its path."""
shard_dir = tmp_path / _DECKY / _SERVICE / "transcripts"
shard_dir.mkdir(parents=True, exist_ok=True)
shard = shard_dir / "sessions-2026-05-08.jsonl"
with shard.open("w") as f:
for line in lines:
f.write(json.dumps(line) + "\n")
return str(shard)
def _shard_with_typing_session(tmp_path, sid: str = _SID) -> str:
"""A minimal session with enough events to fire the calibration floor."""
lines = [{"sid": sid, "hdr": {"version": 2, "width": 80, "height": 24,
"timestamp": 1714521600}}]
text = "ls\rps\rid\rwhoami\rpwd\runame\r"
for i, c in enumerate(text):
lines.append({"sid": sid, "t": i * 0.05, "ch": "i", "d": c})
lines.append({"sid": sid, "t": 5.0, "ch": "o", "d": "anti@host:~$ "})
return _write_shard(tmp_path, sid, lines)
def _payload(shard_path: str | None) -> dict[str, Any]:
return {
"session_id": _SID,
"attacker_uuid": None,
"attacker_ip": _IP,
"decky_id": _DECKY,
"service": _SERVICE,
"ended_at": "2026-05-08T10:00:00",
"duration_s": 5.0,
"commands": [],
"shard_path": shard_path,
}
def _make_repo(*, has_evidence: bool = False, attacker_uuid: str | None = _ATTACKER_UUID):
repo = AsyncMock()
repo.has_observations_for_evidence = AsyncMock(return_value=has_evidence)
repo.get_attacker_uuid_by_ip = AsyncMock(return_value=attacker_uuid)
repo.upsert_observation = AsyncMock(return_value="row-uuid")
return repo
def test_evidence_ref_shape() -> None:
ref = _build_evidence_ref(
"deck", "ssh", "/var/lib/decnet/artifacts/deck/ssh/transcripts/sessions-2026-05-08.jsonl",
"abc",
)
assert ref == "shard:deck/ssh/sessions-2026-05-08.jsonl#abc"
async def test_happy_path_persists_and_publishes(tmp_path) -> None:
shard_path = _shard_with_typing_session(tmp_path)
repo = _make_repo()
published: list[tuple[str, dict[str, Any], str]] = []
publish = lambda topic, payload, etype: published.append((topic, payload, etype))
n = await handle_session_ended(repo, _payload(shard_path), publish)
assert n > 0
assert repo.upsert_observation.await_count == n
# Every persistence row must include the attacker_uuid denorm.
for call in repo.upsert_observation.await_args_list:
row = call.args[0]
assert row["attacker_uuid"] == _ATTACKER_UUID
assert row["evidence_ref"].startswith("shard:")
# Bus published once per observation.
assert len(published) == n
for topic, payload, etype in published:
assert topic.startswith("attacker.observation.")
# Adapter excludes id/ts/v from payload body; handler re-merges.
assert "id" in payload and "ts" in payload and "v" in payload
async def test_missing_session_id_skipped(tmp_path) -> None:
shard_path = _shard_with_typing_session(tmp_path)
p = _payload(shard_path)
p["session_id"] = None
repo = _make_repo()
n = await handle_session_ended(repo, p, None)
assert n == 0
repo.upsert_observation.assert_not_awaited()
async def test_missing_shard_path_skipped(tmp_path) -> None:
repo = _make_repo()
n = await handle_session_ended(repo, _payload(None), None)
assert n == 0
repo.has_observations_for_evidence.assert_not_awaited()
async def test_already_profiled_skipped(tmp_path) -> None:
"""Idempotency: handler returns 0 if has_observations_for_evidence True."""
shard_path = _shard_with_typing_session(tmp_path)
repo = _make_repo(has_evidence=True)
n = await handle_session_ended(repo, _payload(shard_path), None)
assert n == 0
repo.get_attacker_uuid_by_ip.assert_not_awaited()
repo.upsert_observation.assert_not_awaited()
async def test_attacker_uuid_unresolved_defers(tmp_path) -> None:
"""Cold IP — no Attacker row yet. Skip and let the next tick retry."""
shard_path = _shard_with_typing_session(tmp_path)
repo = _make_repo(attacker_uuid=None)
n = await handle_session_ended(repo, _payload(shard_path), None)
assert n == 0
repo.upsert_observation.assert_not_awaited()
async def test_shard_missing_on_disk_defers(tmp_path) -> None:
"""shard_path points at a file that hasn't been flushed yet."""
fake_path = str(tmp_path / "nope" / "sessions-2026-05-08.jsonl")
repo = _make_repo()
n = await handle_session_ended(repo, _payload(fake_path), None)
assert n == 0
repo.upsert_observation.assert_not_awaited()
async def test_sid_not_in_shard_skipped(tmp_path) -> None:
"""Shard exists but doesn't contain our sid."""
other_sid = "ffffffff-eeee-dddd-cccc-bbbbbbbbbbbb"
shard_path = _shard_with_typing_session(tmp_path, sid=other_sid)
repo = _make_repo()
n = await handle_session_ended(repo, _payload(shard_path), None)
assert n == 0
repo.upsert_observation.assert_not_awaited()
async def test_publish_failure_does_not_raise(tmp_path) -> None:
"""Bus publish failures are best-effort; persistence already
succeeded so we don't roll back."""
shard_path = _shard_with_typing_session(tmp_path)
repo = _make_repo()
def _bad(*_a: Any, **_k: Any) -> None:
raise RuntimeError("bus exploded")
n = await handle_session_ended(repo, _payload(shard_path), _bad)
assert n > 0
assert repo.upsert_observation.await_count == n
async def test_publish_none_is_silent(tmp_path) -> None:
"""publish=None is the no-op path used in poll-fallback mode."""
shard_path = _shard_with_typing_session(tmp_path)
repo = _make_repo()
n = await handle_session_ended(repo, _payload(shard_path), None)
assert n > 0