Files
DECNET/tests/profiler/behave_shell/test_parse_and_bursts.py
anti c9a81a23c2 feat(profiler/behave_shell): asciinema parser + paste-burst detection
BEHAVE-EXTRACTOR.md Phase A Step 1. Lays the shared primitives that
Steps 2-3 (motor.input_modality, motor.paste_burst_rate) will
consume:

* parse_shard_line / parse_shard turn a shard JSONL line/file into
  AsciinemaEvents, skipping headers and malformed records.
* PasteBurst dataclass + _detect_paste_bursts group consecutive
  paste-class input events (len(d) >= 4 chars per the prototype's
  empirical floor) into contiguous bursts, splitting on IAT gaps
  larger than PASTE_BURST_MAX_IAT_S (200ms).
* SessionContext now carries iats and paste_bursts derivations.
* Threshold constants harvested from
  BEHAVE/prototype_extractors/shell/extract.py — calibrated against
  the five 2026-05-02 shards.

Tests cover pure-typed, pure-pasted, mixed streams; close vs far
paste events; typed events breaking a burst; PasteBurst immutability;
and the JSON parser's junk handling.
2026-05-03 07:46:01 -04:00

122 lines
4.3 KiB
Python

"""Step 1: shard parsing + paste-burst detection.
Synthetic streams covering pure-typed, pure-pasted, and mixed input,
plus the JSONL shard line parser and its junk handling.
"""
from __future__ import annotations
import json
from decnet.profiler.behave_shell._ctx import build_session_context
from decnet.profiler.behave_shell._parse import (
PasteBurst,
parse_shard,
parse_shard_line,
)
# ── parse_shard_line ───────────────────────────────────────────────────────
def test_parse_shard_line_event() -> None:
line = json.dumps({"sid": "x", "t": 1.5, "ch": "i", "d": "ls"})
assert parse_shard_line(line) == (1.5, "i", "ls")
def test_parse_shard_line_skips_header() -> None:
line = json.dumps({"sid": "x", "hdr": {"version": 2}})
assert parse_shard_line(line) is None
def test_parse_shard_line_handles_blank_and_garbage() -> None:
assert parse_shard_line("") is None
assert parse_shard_line(" ") is None
assert parse_shard_line("not json") is None
assert parse_shard_line('{"t": "string-not-number", "ch": "i", "d": "x"}') is None
assert parse_shard_line('{"t": 1.0, "ch": "z", "d": "x"}') is None # bad kind
assert parse_shard_line('"just a string"') is None
def test_parse_shard_skips_junk_in_stream() -> None:
lines = [
json.dumps({"sid": "x", "hdr": {"version": 2}}),
"",
"garbage",
json.dumps({"sid": "x", "t": 0.1, "ch": "i", "d": "a"}),
json.dumps({"sid": "x", "t": 0.2, "ch": "o", "d": "a\r\n"}),
]
out = list(parse_shard(lines))
assert out == [(0.1, "i", "a"), (0.2, "o", "a\r\n")]
# ── paste-burst detection ──────────────────────────────────────────────────
def test_pure_typed_stream_has_no_paste_bursts() -> None:
# Single-char input events spaced by 0.1s: pure typing.
events = [(i * 0.1, "i", c) for i, c in enumerate("hello world\r")]
ctx = build_session_context(events, sid="t-typed", source="test")
assert ctx.paste_bursts == ()
assert ctx.paste_event_count == 0
# IATs computed correctly
assert len(ctx.iats) == len(events) - 1
for iat in ctx.iats:
assert abs(iat - 0.1) < 1e-9
def test_single_paste_event_one_burst() -> None:
events = [
(0.0, "i", "echo hello world\r"), # 17 chars — paste class
]
ctx = build_session_context(events, sid="t-paste-1", source="test")
assert len(ctx.paste_bursts) == 1
assert ctx.paste_event_count == 1
burst = ctx.paste_bursts[0]
assert burst.start_ts == 0.0
assert burst.end_ts == 0.0
assert burst.char_count == 17
assert burst.event_count == 1
def test_two_close_paste_events_collapse_into_one_burst() -> None:
events = [
(0.0, "i", "first paste line\r"),
(0.05, "i", "second paste line\r"), # within PASTE_BURST_MAX_IAT_S
]
ctx = build_session_context(events, sid="t-paste-2", source="test")
assert len(ctx.paste_bursts) == 1
assert ctx.paste_event_count == 2
assert ctx.paste_bursts[0].event_count == 2
assert ctx.paste_bursts[0].char_count == 17 + 18
def test_two_far_paste_events_split_into_two_bursts() -> None:
events = [
(0.0, "i", "first paste\r"),
(5.0, "i", "second paste\r"), # well past the IAT cap
]
ctx = build_session_context(events, sid="t-paste-far", source="test")
assert len(ctx.paste_bursts) == 2
assert ctx.paste_event_count == 2
def test_typing_between_pastes_breaks_the_burst() -> None:
events: list = [
(0.0, "i", "long pasted line\r"),
(0.5, "i", "x"), # single typed char interrupts
(0.6, "i", "another pasted line\r"),
]
ctx = build_session_context(events, sid="t-mixed", source="test")
assert len(ctx.paste_bursts) == 2
assert ctx.paste_event_count == 2
# The "x" event is not a paste-class event
assert ctx.input_events[1][2] == "x"
def test_paste_burst_record_is_immutable() -> None:
b = PasteBurst(start_ts=0.0, end_ts=1.0, char_count=10, event_count=1)
try:
b.char_count = 99 # type: ignore[misc]
except Exception:
pass
else:
raise AssertionError("PasteBurst should be frozen")