Files
DECNET/decnet/profiler/behave_shell/_ctx.py
anti c9a81a23c2 feat(profiler/behave_shell): asciinema parser + paste-burst detection
BEHAVE-EXTRACTOR.md Phase A Step 1. Lays the shared primitives that
Steps 2-3 (motor.input_modality, motor.paste_burst_rate) will
consume:

* parse_shard_line / parse_shard turn a shard JSONL line/file into
  AsciinemaEvents, skipping headers and malformed records.
* PasteBurst dataclass + _detect_paste_bursts group consecutive
  paste-class input events (len(d) >= 4 chars per the prototype's
  empirical floor) into contiguous bursts, splitting on IAT gaps
  larger than PASTE_BURST_MAX_IAT_S (200ms).
* SessionContext now carries iats and paste_bursts derivations.
* Threshold constants harvested from
  BEHAVE/prototype_extractors/shell/extract.py — calibrated against
  the five 2026-05-02 shards.

Tests cover pure-typed, pure-pasted, mixed streams; close vs far
paste events; typed events breaking a burst; PasteBurst immutability;
and the JSON parser's junk handling.
2026-05-03 07:46:01 -04:00

144 lines
4.2 KiB
Python

"""SessionContext: precomputed bundle every feature function reads from.
A naïve engine re-walks the event stream once per primitive. We don't
do that — one walk over the events builds this context, every feature
reads from it. Adding a new feature is O(1) cost on the parse side.
Step 1 fills ``iats`` (inter-key intervals between input events) and
``paste_bursts`` (contiguous runs of paste-class events). Step 4
will fill ``commands`` / ``inter_cmd_iats`` / ``output_per_cmd``.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Iterable
from decnet.profiler.behave_shell._parse import AsciinemaEvent, PasteBurst
from decnet.profiler.behave_shell._thresholds import (
PASTE_BURST_MAX_IAT_S,
PASTE_MIN_CHARS_PER_EVENT,
)
@dataclass(frozen=True, slots=True)
class SessionContext:
sid: str
source: str
evidence_ref: str
t_start: float
t_end: float
duration_s: float
input_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple)
output_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple)
# Step 1 derivations
iats: tuple[float, ...] = field(default_factory=tuple)
paste_bursts: tuple[PasteBurst, ...] = field(default_factory=tuple)
paste_event_count: int = 0
def _detect_paste_bursts(
inputs: list[AsciinemaEvent],
) -> tuple[tuple[PasteBurst, ...], int]:
"""Group consecutive paste-class input events into PasteBursts.
A paste-class event is one with ``len(data) >= PASTE_MIN_CHARS_PER_EVENT``.
Two adjacent paste-class events collapse into the same burst when
their IAT is within ``PASTE_BURST_MAX_IAT_S``; otherwise a new
burst opens. Returns the bursts and the total count of paste-class
events (the same number ``BEHAVE`` prototype calls ``paste_events``).
"""
bursts: list[PasteBurst] = []
paste_count = 0
cur_start: float | None = None
cur_end: float = 0.0
cur_chars: int = 0
cur_events: int = 0
last_t: float | None = None
def _close() -> None:
nonlocal cur_start, cur_end, cur_chars, cur_events
if cur_start is not None and cur_events > 0:
bursts.append(PasteBurst(
start_ts=cur_start,
end_ts=cur_end,
char_count=cur_chars,
event_count=cur_events,
))
cur_start = None
cur_end = 0.0
cur_chars = 0
cur_events = 0
for t, _kind, data in inputs:
is_paste = len(data) >= PASTE_MIN_CHARS_PER_EVENT
if is_paste:
paste_count += 1
if cur_start is None or (
last_t is not None and (t - last_t) > PASTE_BURST_MAX_IAT_S
):
_close()
cur_start = t
cur_end = t
cur_chars += len(data)
cur_events += 1
else:
_close()
last_t = t
_close()
return tuple(bursts), paste_count
def build_session_context(
events: Iterable[AsciinemaEvent],
*,
sid: str,
source: str,
evidence_ref: str | None = None,
) -> SessionContext:
"""Single-pass build of the SessionContext for ``events``."""
inputs: list[AsciinemaEvent] = []
outputs: list[AsciinemaEvent] = []
t_first: float | None = None
t_last: float = 0.0
for ev in events:
t, kind, _ = ev
if t_first is None:
t_first = t
if t > t_last:
t_last = t
if kind == "i":
inputs.append(ev)
elif kind == "o":
outputs.append(ev)
if t_first is None:
t_start = 0.0
t_end = 0.0
else:
t_start = t_first
t_end = t_last
iats: tuple[float, ...] = tuple(
max(0.0, inputs[i][0] - inputs[i - 1][0]) for i in range(1, len(inputs))
)
paste_bursts, paste_count = _detect_paste_bursts(inputs)
return SessionContext(
sid=sid,
source=source,
evidence_ref=evidence_ref or f"session:{sid}",
t_start=t_start,
t_end=t_end,
duration_s=max(0.0, t_end - t_start),
input_events=tuple(inputs),
output_events=tuple(outputs),
iats=iats,
paste_bursts=paste_bursts,
paste_event_count=paste_count,
)