feat(profiler/behave_shell): scaffold extract_session entry point
BEHAVE-EXTRACTOR.md Phase A Step 0. Lays the package skeleton (__init__/extract/_parse/_ctx/_thresholds/_features) with empty FEATURES = (), so the worker plumbing in BEHAVE-INTEGRATION Phase 4 has a stable import path before any primitive lands. extract_session() builds a SessionContext once and fans the registered feature functions across it; at Step 0 that fan-out is empty and the function yields nothing. Step 1 (asciinema parser + paste-burst detector) and Step 2 (motor.input_modality) land next. Smoke suite asserts the empty contract: empty stream → no observations, single event → t_start == t_end, multi-event → events routed into input_events / output_events by kind, evidence_ref defaults to "session:<sid>" or honours an explicit override.
This commit is contained in:
18
decnet/profiler/behave_shell/__init__.py
Normal file
18
decnet/profiler/behave_shell/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""BEHAVE-SHELL extraction engine — DECNET's official implementation.
|
||||
|
||||
Per ``development/BEHAVE-EXTRACTOR.md``: this package is a pure
|
||||
library. Workers (``BEHAVE-INTEGRATION.md`` Phase 4) own I/O, bus
|
||||
emission, and persistence. The engine just turns one PTY session into
|
||||
``Iterable[Observation]``.
|
||||
|
||||
BEHAVE is the spec; DECNET is the engine.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.profiler.behave_shell.extract import (
|
||||
DEFAULT_SOURCE,
|
||||
build_context,
|
||||
extract_session,
|
||||
)
|
||||
|
||||
__all__ = ["DEFAULT_SOURCE", "build_context", "extract_session"]
|
||||
78
decnet/profiler/behave_shell/_ctx.py
Normal file
78
decnet/profiler/behave_shell/_ctx.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""SessionContext: precomputed bundle every feature function reads from.
|
||||
|
||||
A naïve engine re-walks the event stream once per primitive. We don't
|
||||
do that — one walk over the events builds this context, every feature
|
||||
reads from it. Adding a new feature is O(1) cost on the parse side.
|
||||
|
||||
Step 0 ships only the structural fields (sid / source / evidence_ref /
|
||||
timing envelope). Step 1+ fills ``iats`` / ``paste_bursts`` /
|
||||
``commands`` / ``inter_cmd_iats`` / ``output_per_cmd``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Iterable
|
||||
|
||||
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class SessionContext:
|
||||
sid: str
|
||||
source: str
|
||||
evidence_ref: str
|
||||
t_start: float
|
||||
t_end: float
|
||||
duration_s: float
|
||||
|
||||
input_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple)
|
||||
output_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple)
|
||||
|
||||
|
||||
def build_session_context(
|
||||
events: Iterable[AsciinemaEvent],
|
||||
*,
|
||||
sid: str,
|
||||
source: str,
|
||||
evidence_ref: str | None = None,
|
||||
) -> SessionContext:
|
||||
"""Single-pass build of the SessionContext for ``events``.
|
||||
|
||||
``evidence_ref`` defaults to ``"session:" + sid`` so callers that
|
||||
don't yet plumb a real evidence pointer still get a stable,
|
||||
BEHAVE-envelope-valid string. Workers should pass an explicit
|
||||
pointer to the on-disk shard.
|
||||
"""
|
||||
inputs: list[AsciinemaEvent] = []
|
||||
outputs: list[AsciinemaEvent] = []
|
||||
t_first: float | None = None
|
||||
t_last: float = 0.0
|
||||
|
||||
for ev in events:
|
||||
t, kind, _ = ev
|
||||
if t_first is None:
|
||||
t_first = t
|
||||
if t > t_last:
|
||||
t_last = t
|
||||
if kind == "i":
|
||||
inputs.append(ev)
|
||||
elif kind == "o":
|
||||
outputs.append(ev)
|
||||
|
||||
if t_first is None:
|
||||
t_start = 0.0
|
||||
t_end = 0.0
|
||||
else:
|
||||
t_start = t_first
|
||||
t_end = t_last
|
||||
|
||||
return SessionContext(
|
||||
sid=sid,
|
||||
source=source,
|
||||
evidence_ref=evidence_ref or f"session:{sid}",
|
||||
t_start=t_start,
|
||||
t_end=t_end,
|
||||
duration_s=max(0.0, t_end - t_start),
|
||||
input_events=tuple(inputs),
|
||||
output_events=tuple(outputs),
|
||||
)
|
||||
20
decnet/profiler/behave_shell/_features/__init__.py
Normal file
20
decnet/profiler/behave_shell/_features/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""Registered feature functions.
|
||||
|
||||
Each entry takes a ``SessionContext`` and yields zero or more
|
||||
``Observation`` instances. Adding a primitive = adding a function in a
|
||||
sibling module and appending it to ``FEATURES``.
|
||||
|
||||
Step 0 ships an empty tuple — extract_session() is wired but emits
|
||||
nothing until Step 2.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Callable, Iterable
|
||||
|
||||
from decnet_behave_core.spec.envelope import Observation
|
||||
|
||||
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||
|
||||
FeatureFn = Callable[[SessionContext], Iterable[Observation]]
|
||||
|
||||
FEATURES: tuple[FeatureFn, ...] = ()
|
||||
14
decnet/profiler/behave_shell/_parse.py
Normal file
14
decnet/profiler/behave_shell/_parse.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""Asciinema event types.
|
||||
|
||||
The on-disk shard format is a list of 3-tuples ``(t, kind, data)`` where
|
||||
``t`` is seconds since session start (float), ``kind`` is ``'i'`` (input)
|
||||
or ``'o'`` (output), and ``data`` is the captured bytes decoded as a
|
||||
Python ``str``. Step 0 ships only the type aliases — Step 1 fills the
|
||||
parsing helpers and paste-burst detector.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Literal, Tuple
|
||||
|
||||
EventKind = Literal["i", "o"]
|
||||
AsciinemaEvent = Tuple[float, EventKind, str]
|
||||
11
decnet/profiler/behave_shell/_thresholds.py
Normal file
11
decnet/profiler/behave_shell/_thresholds.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Numeric thresholds for BEHAVE-SHELL primitive classification.
|
||||
|
||||
Each constant added here cites its calibration source. When the
|
||||
registry's ``notes:`` field disagrees with a constant in this file the
|
||||
registry is authoritative — fix the constant and re-run the
|
||||
calibration grid.
|
||||
|
||||
Step 0 ships this file empty by design; thresholds land alongside the
|
||||
feature functions that consume them (Steps 1+).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
51
decnet/profiler/behave_shell/extract.py
Normal file
51
decnet/profiler/behave_shell/extract.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""Public extraction entry point.
|
||||
|
||||
``extract_session`` is the only function workers call. It builds a
|
||||
:class:`SessionContext` once and fans the registered feature functions
|
||||
across it. Pure library: no I/O, no bus, no DB. The worker
|
||||
(``BEHAVE-INTEGRATION.md`` Phase 4) is responsible for those.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable, Iterator
|
||||
|
||||
from decnet_behave_core.spec.envelope import Observation
|
||||
|
||||
from decnet.profiler.behave_shell._ctx import SessionContext, build_session_context
|
||||
from decnet.profiler.behave_shell._features import FEATURES
|
||||
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||
|
||||
DEFAULT_SOURCE = "decnet/profiler/behave_shell/extract.py"
|
||||
|
||||
|
||||
def extract_session(
|
||||
events: Iterable[AsciinemaEvent],
|
||||
*,
|
||||
sid: str,
|
||||
source: str = DEFAULT_SOURCE,
|
||||
evidence_ref: str | None = None,
|
||||
) -> Iterator[Observation]:
|
||||
"""Yield BEHAVE-SHELL observations for a single session.
|
||||
|
||||
``events`` is an iterable of ``(t, kind, data)`` tuples — see
|
||||
``_parse.AsciinemaEvent``. ``sid`` identifies the session for
|
||||
evidence pointers and downstream joins.
|
||||
"""
|
||||
ctx = build_session_context(
|
||||
events, sid=sid, source=source, evidence_ref=evidence_ref
|
||||
)
|
||||
for feature_fn in FEATURES:
|
||||
yield from feature_fn(ctx)
|
||||
|
||||
|
||||
def build_context(
|
||||
events: Iterable[AsciinemaEvent],
|
||||
*,
|
||||
sid: str,
|
||||
source: str = DEFAULT_SOURCE,
|
||||
evidence_ref: str | None = None,
|
||||
) -> SessionContext:
|
||||
"""Expose the SessionContext build for tests + future debug tools."""
|
||||
return build_session_context(
|
||||
events, sid=sid, source=source, evidence_ref=evidence_ref
|
||||
)
|
||||
0
tests/profiler/behave_shell/__init__.py
Normal file
0
tests/profiler/behave_shell/__init__.py
Normal file
94
tests/profiler/behave_shell/test_extract_smoke.py
Normal file
94
tests/profiler/behave_shell/test_extract_smoke.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""Step 0 smoke: prove the wiring before any logic.
|
||||
|
||||
Before any feature function lands, verify:
|
||||
|
||||
* the public ``extract_session`` import path resolves;
|
||||
* an empty event stream yields zero observations and a well-formed
|
||||
zero-duration ``SessionContext``;
|
||||
* a single input event yields a context with ``t_start == t_end``
|
||||
and ``duration_s == 0.0``;
|
||||
* a multi-event stream populates ``t_start`` / ``t_end`` /
|
||||
``duration_s`` correctly and routes events into the
|
||||
``input_events`` / ``output_events`` slots by kind;
|
||||
* ``FEATURES`` is empty at Step 0 — the empty contract is the gate
|
||||
that the next step must intentionally break.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.profiler.behave_shell import (
|
||||
DEFAULT_SOURCE,
|
||||
build_context,
|
||||
extract_session,
|
||||
)
|
||||
from decnet.profiler.behave_shell._features import FEATURES
|
||||
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||
|
||||
|
||||
def test_features_tuple_is_empty_at_step_0() -> None:
|
||||
assert FEATURES == ()
|
||||
|
||||
|
||||
def test_default_source_is_canonical_path() -> None:
|
||||
assert DEFAULT_SOURCE == "decnet/profiler/behave_shell/extract.py"
|
||||
|
||||
|
||||
def test_extract_session_empty_stream_yields_no_observations() -> None:
|
||||
out = list(extract_session([], sid="sess-empty"))
|
||||
assert out == []
|
||||
|
||||
|
||||
def test_build_context_empty_stream_zero_duration() -> None:
|
||||
ctx = build_context([], sid="sess-empty")
|
||||
assert ctx.sid == "sess-empty"
|
||||
assert ctx.source == DEFAULT_SOURCE
|
||||
assert ctx.evidence_ref == "session:sess-empty"
|
||||
assert ctx.t_start == 0.0
|
||||
assert ctx.t_end == 0.0
|
||||
assert ctx.duration_s == 0.0
|
||||
assert ctx.input_events == ()
|
||||
assert ctx.output_events == ()
|
||||
|
||||
|
||||
def test_build_context_single_input_event() -> None:
|
||||
events: list[AsciinemaEvent] = [(1.5, "i", "a")]
|
||||
ctx = build_context(events, sid="sess-1")
|
||||
assert ctx.t_start == 1.5
|
||||
assert ctx.t_end == 1.5
|
||||
assert ctx.duration_s == 0.0
|
||||
assert ctx.input_events == ((1.5, "i", "a"),)
|
||||
assert ctx.output_events == ()
|
||||
|
||||
|
||||
def test_build_context_multi_event_routes_by_kind() -> None:
|
||||
events: list[AsciinemaEvent] = [
|
||||
(0.0, "i", "l"),
|
||||
(0.1, "i", "s"),
|
||||
(0.2, "o", "ls\r\n"),
|
||||
(0.3, "o", "file.txt\r\n"),
|
||||
(0.5, "i", "\r"),
|
||||
]
|
||||
ctx = build_context(events, sid="sess-multi")
|
||||
assert ctx.t_start == 0.0
|
||||
assert ctx.t_end == 0.5
|
||||
assert ctx.duration_s == 0.5
|
||||
assert len(ctx.input_events) == 3
|
||||
assert len(ctx.output_events) == 2
|
||||
# Order preserved
|
||||
assert ctx.input_events[0] == (0.0, "i", "l")
|
||||
assert ctx.output_events[-1] == (0.3, "o", "file.txt\r\n")
|
||||
|
||||
|
||||
def test_extract_session_explicit_evidence_ref_overrides_default() -> None:
|
||||
ctx = build_context(
|
||||
[(0.0, "i", "x")],
|
||||
sid="sess-x",
|
||||
evidence_ref="shard:/var/log/d/sess-x.cast",
|
||||
)
|
||||
assert ctx.evidence_ref == "shard:/var/log/d/sess-x.cast"
|
||||
|
||||
|
||||
def test_extract_session_with_features_still_empty() -> None:
|
||||
"""Until Step 2 lands, even a populated stream emits nothing."""
|
||||
events: list[AsciinemaEvent] = [(t / 10.0, "i", c) for t, c in enumerate("hello\r")]
|
||||
out = list(extract_session(events, sid="sess-features-empty"))
|
||||
assert out == []
|
||||
Reference in New Issue
Block a user