feat(profiler/behave_shell): scaffold extract_session entry point
BEHAVE-EXTRACTOR.md Phase A Step 0. Lays the package skeleton (__init__/extract/_parse/_ctx/_thresholds/_features) with empty FEATURES = (), so the worker plumbing in BEHAVE-INTEGRATION Phase 4 has a stable import path before any primitive lands. extract_session() builds a SessionContext once and fans the registered feature functions across it; at Step 0 that fan-out is empty and the function yields nothing. Step 1 (asciinema parser + paste-burst detector) and Step 2 (motor.input_modality) land next. Smoke suite asserts the empty contract: empty stream → no observations, single event → t_start == t_end, multi-event → events routed into input_events / output_events by kind, evidence_ref defaults to "session:<sid>" or honours an explicit override.
This commit is contained in:
18
decnet/profiler/behave_shell/__init__.py
Normal file
18
decnet/profiler/behave_shell/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
"""BEHAVE-SHELL extraction engine — DECNET's official implementation.
|
||||||
|
|
||||||
|
Per ``development/BEHAVE-EXTRACTOR.md``: this package is a pure
|
||||||
|
library. Workers (``BEHAVE-INTEGRATION.md`` Phase 4) own I/O, bus
|
||||||
|
emission, and persistence. The engine just turns one PTY session into
|
||||||
|
``Iterable[Observation]``.
|
||||||
|
|
||||||
|
BEHAVE is the spec; DECNET is the engine.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell.extract import (
|
||||||
|
DEFAULT_SOURCE,
|
||||||
|
build_context,
|
||||||
|
extract_session,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = ["DEFAULT_SOURCE", "build_context", "extract_session"]
|
||||||
78
decnet/profiler/behave_shell/_ctx.py
Normal file
78
decnet/profiler/behave_shell/_ctx.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
"""SessionContext: precomputed bundle every feature function reads from.
|
||||||
|
|
||||||
|
A naïve engine re-walks the event stream once per primitive. We don't
|
||||||
|
do that — one walk over the events builds this context, every feature
|
||||||
|
reads from it. Adding a new feature is O(1) cost on the parse side.
|
||||||
|
|
||||||
|
Step 0 ships only the structural fields (sid / source / evidence_ref /
|
||||||
|
timing envelope). Step 1+ fills ``iats`` / ``paste_bursts`` /
|
||||||
|
``commands`` / ``inter_cmd_iats`` / ``output_per_cmd``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class SessionContext:
|
||||||
|
sid: str
|
||||||
|
source: str
|
||||||
|
evidence_ref: str
|
||||||
|
t_start: float
|
||||||
|
t_end: float
|
||||||
|
duration_s: float
|
||||||
|
|
||||||
|
input_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple)
|
||||||
|
output_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple)
|
||||||
|
|
||||||
|
|
||||||
|
def build_session_context(
|
||||||
|
events: Iterable[AsciinemaEvent],
|
||||||
|
*,
|
||||||
|
sid: str,
|
||||||
|
source: str,
|
||||||
|
evidence_ref: str | None = None,
|
||||||
|
) -> SessionContext:
|
||||||
|
"""Single-pass build of the SessionContext for ``events``.
|
||||||
|
|
||||||
|
``evidence_ref`` defaults to ``"session:" + sid`` so callers that
|
||||||
|
don't yet plumb a real evidence pointer still get a stable,
|
||||||
|
BEHAVE-envelope-valid string. Workers should pass an explicit
|
||||||
|
pointer to the on-disk shard.
|
||||||
|
"""
|
||||||
|
inputs: list[AsciinemaEvent] = []
|
||||||
|
outputs: list[AsciinemaEvent] = []
|
||||||
|
t_first: float | None = None
|
||||||
|
t_last: float = 0.0
|
||||||
|
|
||||||
|
for ev in events:
|
||||||
|
t, kind, _ = ev
|
||||||
|
if t_first is None:
|
||||||
|
t_first = t
|
||||||
|
if t > t_last:
|
||||||
|
t_last = t
|
||||||
|
if kind == "i":
|
||||||
|
inputs.append(ev)
|
||||||
|
elif kind == "o":
|
||||||
|
outputs.append(ev)
|
||||||
|
|
||||||
|
if t_first is None:
|
||||||
|
t_start = 0.0
|
||||||
|
t_end = 0.0
|
||||||
|
else:
|
||||||
|
t_start = t_first
|
||||||
|
t_end = t_last
|
||||||
|
|
||||||
|
return SessionContext(
|
||||||
|
sid=sid,
|
||||||
|
source=source,
|
||||||
|
evidence_ref=evidence_ref or f"session:{sid}",
|
||||||
|
t_start=t_start,
|
||||||
|
t_end=t_end,
|
||||||
|
duration_s=max(0.0, t_end - t_start),
|
||||||
|
input_events=tuple(inputs),
|
||||||
|
output_events=tuple(outputs),
|
||||||
|
)
|
||||||
20
decnet/profiler/behave_shell/_features/__init__.py
Normal file
20
decnet/profiler/behave_shell/_features/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
"""Registered feature functions.
|
||||||
|
|
||||||
|
Each entry takes a ``SessionContext`` and yields zero or more
|
||||||
|
``Observation`` instances. Adding a primitive = adding a function in a
|
||||||
|
sibling module and appending it to ``FEATURES``.
|
||||||
|
|
||||||
|
Step 0 ships an empty tuple — extract_session() is wired but emits
|
||||||
|
nothing until Step 2.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Callable, Iterable
|
||||||
|
|
||||||
|
from decnet_behave_core.spec.envelope import Observation
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||||
|
|
||||||
|
FeatureFn = Callable[[SessionContext], Iterable[Observation]]
|
||||||
|
|
||||||
|
FEATURES: tuple[FeatureFn, ...] = ()
|
||||||
14
decnet/profiler/behave_shell/_parse.py
Normal file
14
decnet/profiler/behave_shell/_parse.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
"""Asciinema event types.
|
||||||
|
|
||||||
|
The on-disk shard format is a list of 3-tuples ``(t, kind, data)`` where
|
||||||
|
``t`` is seconds since session start (float), ``kind`` is ``'i'`` (input)
|
||||||
|
or ``'o'`` (output), and ``data`` is the captured bytes decoded as a
|
||||||
|
Python ``str``. Step 0 ships only the type aliases — Step 1 fills the
|
||||||
|
parsing helpers and paste-burst detector.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Literal, Tuple
|
||||||
|
|
||||||
|
EventKind = Literal["i", "o"]
|
||||||
|
AsciinemaEvent = Tuple[float, EventKind, str]
|
||||||
11
decnet/profiler/behave_shell/_thresholds.py
Normal file
11
decnet/profiler/behave_shell/_thresholds.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
"""Numeric thresholds for BEHAVE-SHELL primitive classification.
|
||||||
|
|
||||||
|
Each constant added here cites its calibration source. When the
|
||||||
|
registry's ``notes:`` field disagrees with a constant in this file the
|
||||||
|
registry is authoritative — fix the constant and re-run the
|
||||||
|
calibration grid.
|
||||||
|
|
||||||
|
Step 0 ships this file empty by design; thresholds land alongside the
|
||||||
|
feature functions that consume them (Steps 1+).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
51
decnet/profiler/behave_shell/extract.py
Normal file
51
decnet/profiler/behave_shell/extract.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
"""Public extraction entry point.
|
||||||
|
|
||||||
|
``extract_session`` is the only function workers call. It builds a
|
||||||
|
:class:`SessionContext` once and fans the registered feature functions
|
||||||
|
across it. Pure library: no I/O, no bus, no DB. The worker
|
||||||
|
(``BEHAVE-INTEGRATION.md`` Phase 4) is responsible for those.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Iterable, Iterator
|
||||||
|
|
||||||
|
from decnet_behave_core.spec.envelope import Observation
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell._ctx import SessionContext, build_session_context
|
||||||
|
from decnet.profiler.behave_shell._features import FEATURES
|
||||||
|
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||||
|
|
||||||
|
DEFAULT_SOURCE = "decnet/profiler/behave_shell/extract.py"
|
||||||
|
|
||||||
|
|
||||||
|
def extract_session(
|
||||||
|
events: Iterable[AsciinemaEvent],
|
||||||
|
*,
|
||||||
|
sid: str,
|
||||||
|
source: str = DEFAULT_SOURCE,
|
||||||
|
evidence_ref: str | None = None,
|
||||||
|
) -> Iterator[Observation]:
|
||||||
|
"""Yield BEHAVE-SHELL observations for a single session.
|
||||||
|
|
||||||
|
``events`` is an iterable of ``(t, kind, data)`` tuples — see
|
||||||
|
``_parse.AsciinemaEvent``. ``sid`` identifies the session for
|
||||||
|
evidence pointers and downstream joins.
|
||||||
|
"""
|
||||||
|
ctx = build_session_context(
|
||||||
|
events, sid=sid, source=source, evidence_ref=evidence_ref
|
||||||
|
)
|
||||||
|
for feature_fn in FEATURES:
|
||||||
|
yield from feature_fn(ctx)
|
||||||
|
|
||||||
|
|
||||||
|
def build_context(
|
||||||
|
events: Iterable[AsciinemaEvent],
|
||||||
|
*,
|
||||||
|
sid: str,
|
||||||
|
source: str = DEFAULT_SOURCE,
|
||||||
|
evidence_ref: str | None = None,
|
||||||
|
) -> SessionContext:
|
||||||
|
"""Expose the SessionContext build for tests + future debug tools."""
|
||||||
|
return build_session_context(
|
||||||
|
events, sid=sid, source=source, evidence_ref=evidence_ref
|
||||||
|
)
|
||||||
0
tests/profiler/behave_shell/__init__.py
Normal file
0
tests/profiler/behave_shell/__init__.py
Normal file
94
tests/profiler/behave_shell/test_extract_smoke.py
Normal file
94
tests/profiler/behave_shell/test_extract_smoke.py
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
"""Step 0 smoke: prove the wiring before any logic.
|
||||||
|
|
||||||
|
Before any feature function lands, verify:
|
||||||
|
|
||||||
|
* the public ``extract_session`` import path resolves;
|
||||||
|
* an empty event stream yields zero observations and a well-formed
|
||||||
|
zero-duration ``SessionContext``;
|
||||||
|
* a single input event yields a context with ``t_start == t_end``
|
||||||
|
and ``duration_s == 0.0``;
|
||||||
|
* a multi-event stream populates ``t_start`` / ``t_end`` /
|
||||||
|
``duration_s`` correctly and routes events into the
|
||||||
|
``input_events`` / ``output_events`` slots by kind;
|
||||||
|
* ``FEATURES`` is empty at Step 0 — the empty contract is the gate
|
||||||
|
that the next step must intentionally break.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell import (
|
||||||
|
DEFAULT_SOURCE,
|
||||||
|
build_context,
|
||||||
|
extract_session,
|
||||||
|
)
|
||||||
|
from decnet.profiler.behave_shell._features import FEATURES
|
||||||
|
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||||
|
|
||||||
|
|
||||||
|
def test_features_tuple_is_empty_at_step_0() -> None:
|
||||||
|
assert FEATURES == ()
|
||||||
|
|
||||||
|
|
||||||
|
def test_default_source_is_canonical_path() -> None:
|
||||||
|
assert DEFAULT_SOURCE == "decnet/profiler/behave_shell/extract.py"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_session_empty_stream_yields_no_observations() -> None:
|
||||||
|
out = list(extract_session([], sid="sess-empty"))
|
||||||
|
assert out == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_context_empty_stream_zero_duration() -> None:
|
||||||
|
ctx = build_context([], sid="sess-empty")
|
||||||
|
assert ctx.sid == "sess-empty"
|
||||||
|
assert ctx.source == DEFAULT_SOURCE
|
||||||
|
assert ctx.evidence_ref == "session:sess-empty"
|
||||||
|
assert ctx.t_start == 0.0
|
||||||
|
assert ctx.t_end == 0.0
|
||||||
|
assert ctx.duration_s == 0.0
|
||||||
|
assert ctx.input_events == ()
|
||||||
|
assert ctx.output_events == ()
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_context_single_input_event() -> None:
|
||||||
|
events: list[AsciinemaEvent] = [(1.5, "i", "a")]
|
||||||
|
ctx = build_context(events, sid="sess-1")
|
||||||
|
assert ctx.t_start == 1.5
|
||||||
|
assert ctx.t_end == 1.5
|
||||||
|
assert ctx.duration_s == 0.0
|
||||||
|
assert ctx.input_events == ((1.5, "i", "a"),)
|
||||||
|
assert ctx.output_events == ()
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_context_multi_event_routes_by_kind() -> None:
|
||||||
|
events: list[AsciinemaEvent] = [
|
||||||
|
(0.0, "i", "l"),
|
||||||
|
(0.1, "i", "s"),
|
||||||
|
(0.2, "o", "ls\r\n"),
|
||||||
|
(0.3, "o", "file.txt\r\n"),
|
||||||
|
(0.5, "i", "\r"),
|
||||||
|
]
|
||||||
|
ctx = build_context(events, sid="sess-multi")
|
||||||
|
assert ctx.t_start == 0.0
|
||||||
|
assert ctx.t_end == 0.5
|
||||||
|
assert ctx.duration_s == 0.5
|
||||||
|
assert len(ctx.input_events) == 3
|
||||||
|
assert len(ctx.output_events) == 2
|
||||||
|
# Order preserved
|
||||||
|
assert ctx.input_events[0] == (0.0, "i", "l")
|
||||||
|
assert ctx.output_events[-1] == (0.3, "o", "file.txt\r\n")
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_session_explicit_evidence_ref_overrides_default() -> None:
|
||||||
|
ctx = build_context(
|
||||||
|
[(0.0, "i", "x")],
|
||||||
|
sid="sess-x",
|
||||||
|
evidence_ref="shard:/var/log/d/sess-x.cast",
|
||||||
|
)
|
||||||
|
assert ctx.evidence_ref == "shard:/var/log/d/sess-x.cast"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_session_with_features_still_empty() -> None:
|
||||||
|
"""Until Step 2 lands, even a populated stream emits nothing."""
|
||||||
|
events: list[AsciinemaEvent] = [(t / 10.0, "i", c) for t, c in enumerate("hello\r")]
|
||||||
|
out = list(extract_session(events, sid="sess-features-empty"))
|
||||||
|
assert out == []
|
||||||
Reference in New Issue
Block a user