From f8eae04e5d7a89044e0e8972670b427add9f7a65 Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 3 May 2026 07:42:09 -0400 Subject: [PATCH] feat(profiler/behave_shell): scaffold extract_session entry point MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BEHAVE-EXTRACTOR.md Phase A Step 0. Lays the package skeleton (__init__/extract/_parse/_ctx/_thresholds/_features) with empty FEATURES = (), so the worker plumbing in BEHAVE-INTEGRATION Phase 4 has a stable import path before any primitive lands. extract_session() builds a SessionContext once and fans the registered feature functions across it; at Step 0 that fan-out is empty and the function yields nothing. Step 1 (asciinema parser + paste-burst detector) and Step 2 (motor.input_modality) land next. Smoke suite asserts the empty contract: empty stream → no observations, single event → t_start == t_end, multi-event → events routed into input_events / output_events by kind, evidence_ref defaults to "session:" or honours an explicit override. --- decnet/profiler/behave_shell/__init__.py | 18 ++++ decnet/profiler/behave_shell/_ctx.py | 78 +++++++++++++++ .../behave_shell/_features/__init__.py | 20 ++++ decnet/profiler/behave_shell/_parse.py | 14 +++ decnet/profiler/behave_shell/_thresholds.py | 11 +++ decnet/profiler/behave_shell/extract.py | 51 ++++++++++ tests/profiler/behave_shell/__init__.py | 0 .../behave_shell/test_extract_smoke.py | 94 +++++++++++++++++++ 8 files changed, 286 insertions(+) create mode 100644 decnet/profiler/behave_shell/__init__.py create mode 100644 decnet/profiler/behave_shell/_ctx.py create mode 100644 decnet/profiler/behave_shell/_features/__init__.py create mode 100644 decnet/profiler/behave_shell/_parse.py create mode 100644 decnet/profiler/behave_shell/_thresholds.py create mode 100644 decnet/profiler/behave_shell/extract.py create mode 100644 tests/profiler/behave_shell/__init__.py create mode 100644 tests/profiler/behave_shell/test_extract_smoke.py diff --git a/decnet/profiler/behave_shell/__init__.py b/decnet/profiler/behave_shell/__init__.py new file mode 100644 index 00000000..d8a2988b --- /dev/null +++ b/decnet/profiler/behave_shell/__init__.py @@ -0,0 +1,18 @@ +"""BEHAVE-SHELL extraction engine — DECNET's official implementation. + +Per ``development/BEHAVE-EXTRACTOR.md``: this package is a pure +library. Workers (``BEHAVE-INTEGRATION.md`` Phase 4) own I/O, bus +emission, and persistence. The engine just turns one PTY session into +``Iterable[Observation]``. + +BEHAVE is the spec; DECNET is the engine. +""" +from __future__ import annotations + +from decnet.profiler.behave_shell.extract import ( + DEFAULT_SOURCE, + build_context, + extract_session, +) + +__all__ = ["DEFAULT_SOURCE", "build_context", "extract_session"] diff --git a/decnet/profiler/behave_shell/_ctx.py b/decnet/profiler/behave_shell/_ctx.py new file mode 100644 index 00000000..bb77bc56 --- /dev/null +++ b/decnet/profiler/behave_shell/_ctx.py @@ -0,0 +1,78 @@ +"""SessionContext: precomputed bundle every feature function reads from. + +A naïve engine re-walks the event stream once per primitive. We don't +do that — one walk over the events builds this context, every feature +reads from it. Adding a new feature is O(1) cost on the parse side. + +Step 0 ships only the structural fields (sid / source / evidence_ref / +timing envelope). Step 1+ fills ``iats`` / ``paste_bursts`` / +``commands`` / ``inter_cmd_iats`` / ``output_per_cmd``. +""" +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Iterable + +from decnet.profiler.behave_shell._parse import AsciinemaEvent + + +@dataclass(frozen=True, slots=True) +class SessionContext: + sid: str + source: str + evidence_ref: str + t_start: float + t_end: float + duration_s: float + + input_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple) + output_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple) + + +def build_session_context( + events: Iterable[AsciinemaEvent], + *, + sid: str, + source: str, + evidence_ref: str | None = None, +) -> SessionContext: + """Single-pass build of the SessionContext for ``events``. + + ``evidence_ref`` defaults to ``"session:" + sid`` so callers that + don't yet plumb a real evidence pointer still get a stable, + BEHAVE-envelope-valid string. Workers should pass an explicit + pointer to the on-disk shard. + """ + inputs: list[AsciinemaEvent] = [] + outputs: list[AsciinemaEvent] = [] + t_first: float | None = None + t_last: float = 0.0 + + for ev in events: + t, kind, _ = ev + if t_first is None: + t_first = t + if t > t_last: + t_last = t + if kind == "i": + inputs.append(ev) + elif kind == "o": + outputs.append(ev) + + if t_first is None: + t_start = 0.0 + t_end = 0.0 + else: + t_start = t_first + t_end = t_last + + return SessionContext( + sid=sid, + source=source, + evidence_ref=evidence_ref or f"session:{sid}", + t_start=t_start, + t_end=t_end, + duration_s=max(0.0, t_end - t_start), + input_events=tuple(inputs), + output_events=tuple(outputs), + ) diff --git a/decnet/profiler/behave_shell/_features/__init__.py b/decnet/profiler/behave_shell/_features/__init__.py new file mode 100644 index 00000000..768a7fc6 --- /dev/null +++ b/decnet/profiler/behave_shell/_features/__init__.py @@ -0,0 +1,20 @@ +"""Registered feature functions. + +Each entry takes a ``SessionContext`` and yields zero or more +``Observation`` instances. Adding a primitive = adding a function in a +sibling module and appending it to ``FEATURES``. + +Step 0 ships an empty tuple — extract_session() is wired but emits +nothing until Step 2. +""" +from __future__ import annotations + +from typing import Callable, Iterable + +from decnet_behave_core.spec.envelope import Observation + +from decnet.profiler.behave_shell._ctx import SessionContext + +FeatureFn = Callable[[SessionContext], Iterable[Observation]] + +FEATURES: tuple[FeatureFn, ...] = () diff --git a/decnet/profiler/behave_shell/_parse.py b/decnet/profiler/behave_shell/_parse.py new file mode 100644 index 00000000..831f4204 --- /dev/null +++ b/decnet/profiler/behave_shell/_parse.py @@ -0,0 +1,14 @@ +"""Asciinema event types. + +The on-disk shard format is a list of 3-tuples ``(t, kind, data)`` where +``t`` is seconds since session start (float), ``kind`` is ``'i'`` (input) +or ``'o'`` (output), and ``data`` is the captured bytes decoded as a +Python ``str``. Step 0 ships only the type aliases — Step 1 fills the +parsing helpers and paste-burst detector. +""" +from __future__ import annotations + +from typing import Literal, Tuple + +EventKind = Literal["i", "o"] +AsciinemaEvent = Tuple[float, EventKind, str] diff --git a/decnet/profiler/behave_shell/_thresholds.py b/decnet/profiler/behave_shell/_thresholds.py new file mode 100644 index 00000000..c857ad0e --- /dev/null +++ b/decnet/profiler/behave_shell/_thresholds.py @@ -0,0 +1,11 @@ +"""Numeric thresholds for BEHAVE-SHELL primitive classification. + +Each constant added here cites its calibration source. When the +registry's ``notes:`` field disagrees with a constant in this file the +registry is authoritative — fix the constant and re-run the +calibration grid. + +Step 0 ships this file empty by design; thresholds land alongside the +feature functions that consume them (Steps 1+). +""" +from __future__ import annotations diff --git a/decnet/profiler/behave_shell/extract.py b/decnet/profiler/behave_shell/extract.py new file mode 100644 index 00000000..c02fa6ee --- /dev/null +++ b/decnet/profiler/behave_shell/extract.py @@ -0,0 +1,51 @@ +"""Public extraction entry point. + +``extract_session`` is the only function workers call. It builds a +:class:`SessionContext` once and fans the registered feature functions +across it. Pure library: no I/O, no bus, no DB. The worker +(``BEHAVE-INTEGRATION.md`` Phase 4) is responsible for those. +""" +from __future__ import annotations + +from typing import Iterable, Iterator + +from decnet_behave_core.spec.envelope import Observation + +from decnet.profiler.behave_shell._ctx import SessionContext, build_session_context +from decnet.profiler.behave_shell._features import FEATURES +from decnet.profiler.behave_shell._parse import AsciinemaEvent + +DEFAULT_SOURCE = "decnet/profiler/behave_shell/extract.py" + + +def extract_session( + events: Iterable[AsciinemaEvent], + *, + sid: str, + source: str = DEFAULT_SOURCE, + evidence_ref: str | None = None, +) -> Iterator[Observation]: + """Yield BEHAVE-SHELL observations for a single session. + + ``events`` is an iterable of ``(t, kind, data)`` tuples — see + ``_parse.AsciinemaEvent``. ``sid`` identifies the session for + evidence pointers and downstream joins. + """ + ctx = build_session_context( + events, sid=sid, source=source, evidence_ref=evidence_ref + ) + for feature_fn in FEATURES: + yield from feature_fn(ctx) + + +def build_context( + events: Iterable[AsciinemaEvent], + *, + sid: str, + source: str = DEFAULT_SOURCE, + evidence_ref: str | None = None, +) -> SessionContext: + """Expose the SessionContext build for tests + future debug tools.""" + return build_session_context( + events, sid=sid, source=source, evidence_ref=evidence_ref + ) diff --git a/tests/profiler/behave_shell/__init__.py b/tests/profiler/behave_shell/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/profiler/behave_shell/test_extract_smoke.py b/tests/profiler/behave_shell/test_extract_smoke.py new file mode 100644 index 00000000..59298a13 --- /dev/null +++ b/tests/profiler/behave_shell/test_extract_smoke.py @@ -0,0 +1,94 @@ +"""Step 0 smoke: prove the wiring before any logic. + +Before any feature function lands, verify: + +* the public ``extract_session`` import path resolves; +* an empty event stream yields zero observations and a well-formed + zero-duration ``SessionContext``; +* a single input event yields a context with ``t_start == t_end`` + and ``duration_s == 0.0``; +* a multi-event stream populates ``t_start`` / ``t_end`` / + ``duration_s`` correctly and routes events into the + ``input_events`` / ``output_events`` slots by kind; +* ``FEATURES`` is empty at Step 0 — the empty contract is the gate + that the next step must intentionally break. +""" +from __future__ import annotations + +from decnet.profiler.behave_shell import ( + DEFAULT_SOURCE, + build_context, + extract_session, +) +from decnet.profiler.behave_shell._features import FEATURES +from decnet.profiler.behave_shell._parse import AsciinemaEvent + + +def test_features_tuple_is_empty_at_step_0() -> None: + assert FEATURES == () + + +def test_default_source_is_canonical_path() -> None: + assert DEFAULT_SOURCE == "decnet/profiler/behave_shell/extract.py" + + +def test_extract_session_empty_stream_yields_no_observations() -> None: + out = list(extract_session([], sid="sess-empty")) + assert out == [] + + +def test_build_context_empty_stream_zero_duration() -> None: + ctx = build_context([], sid="sess-empty") + assert ctx.sid == "sess-empty" + assert ctx.source == DEFAULT_SOURCE + assert ctx.evidence_ref == "session:sess-empty" + assert ctx.t_start == 0.0 + assert ctx.t_end == 0.0 + assert ctx.duration_s == 0.0 + assert ctx.input_events == () + assert ctx.output_events == () + + +def test_build_context_single_input_event() -> None: + events: list[AsciinemaEvent] = [(1.5, "i", "a")] + ctx = build_context(events, sid="sess-1") + assert ctx.t_start == 1.5 + assert ctx.t_end == 1.5 + assert ctx.duration_s == 0.0 + assert ctx.input_events == ((1.5, "i", "a"),) + assert ctx.output_events == () + + +def test_build_context_multi_event_routes_by_kind() -> None: + events: list[AsciinemaEvent] = [ + (0.0, "i", "l"), + (0.1, "i", "s"), + (0.2, "o", "ls\r\n"), + (0.3, "o", "file.txt\r\n"), + (0.5, "i", "\r"), + ] + ctx = build_context(events, sid="sess-multi") + assert ctx.t_start == 0.0 + assert ctx.t_end == 0.5 + assert ctx.duration_s == 0.5 + assert len(ctx.input_events) == 3 + assert len(ctx.output_events) == 2 + # Order preserved + assert ctx.input_events[0] == (0.0, "i", "l") + assert ctx.output_events[-1] == (0.3, "o", "file.txt\r\n") + + +def test_extract_session_explicit_evidence_ref_overrides_default() -> None: + ctx = build_context( + [(0.0, "i", "x")], + sid="sess-x", + evidence_ref="shard:/var/log/d/sess-x.cast", + ) + assert ctx.evidence_ref == "shard:/var/log/d/sess-x.cast" + + +def test_extract_session_with_features_still_empty() -> None: + """Until Step 2 lands, even a populated stream emits nothing.""" + events: list[AsciinemaEvent] = [(t / 10.0, "i", c) for t, c in enumerate("hello\r")] + out = list(extract_session(events, sid="sess-features-empty")) + assert out == []