feat(profiler/behave_shell): emit motor.input_modality
BEHAVE-EXTRACTOR.md Phase A Step 2. The first primitive — picked
first because it has the highest discriminative value (HUMAN vs
everyone) and the simplest implementation (paste-event ratio over
total inputs).
* _features/motor.py:input_modality(ctx) emits one Observation
per session in {typed, pasted, mixed} with confidence 0.75 / 0.70.
* _features/_emit.py centralises the make_observation helper so
every feature module gets the same Window/source/evidence_ref
boilerplate without copy-paste.
* Thresholds inherited from the prototype's calibration history
(MODALITY_PASTED_MIN=0.40, MODALITY_TYPED_MAX=0.05).
* Zero-input session skips emission — registry doesn't admit
"unknown" here.
Tests: pure-typed → typed, pure-pasted → pasted, mixed → mixed,
output-only session → no observation, full envelope round-trip.
This commit is contained in:
@@ -3,9 +3,6 @@
|
||||
Each entry takes a ``SessionContext`` and yields zero or more
|
||||
``Observation`` instances. Adding a primitive = adding a function in a
|
||||
sibling module and appending it to ``FEATURES``.
|
||||
|
||||
Step 0 ships an empty tuple — extract_session() is wired but emits
|
||||
nothing until Step 2.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -14,7 +11,10 @@ from typing import Callable, Iterable
|
||||
from decnet_behave_core.spec.envelope import Observation
|
||||
|
||||
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||
from decnet.profiler.behave_shell._features.motor import input_modality
|
||||
|
||||
FeatureFn = Callable[[SessionContext], Iterable[Observation]]
|
||||
|
||||
FEATURES: tuple[FeatureFn, ...] = ()
|
||||
FEATURES: tuple[FeatureFn, ...] = (
|
||||
input_modality,
|
||||
)
|
||||
|
||||
32
decnet/profiler/behave_shell/_features/_emit.py
Normal file
32
decnet/profiler/behave_shell/_features/_emit.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""Helper for building registry-valid :class:`Observation` records.
|
||||
|
||||
Every feature module would otherwise repeat the same Window /
|
||||
source / evidence_ref boilerplate. This helper centralises it and is
|
||||
the one place to reach when emission semantics change (e.g. when we
|
||||
start parametrising windows on a per-primitive basis).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from decnet_behave_core.spec.envelope import Observation, Window
|
||||
|
||||
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||
|
||||
|
||||
def make_observation(
|
||||
ctx: SessionContext,
|
||||
*,
|
||||
primitive: str,
|
||||
value: Any,
|
||||
confidence: float,
|
||||
) -> Observation:
|
||||
"""Build one :class:`Observation` for the whole-session window."""
|
||||
return Observation(
|
||||
primitive=primitive,
|
||||
value=value,
|
||||
confidence=confidence,
|
||||
window=Window(start_ts=ctx.t_start, end_ts=ctx.t_end),
|
||||
source=ctx.source,
|
||||
evidence_ref=ctx.evidence_ref,
|
||||
)
|
||||
45
decnet/profiler/behave_shell/_features/motor.py
Normal file
45
decnet/profiler/behave_shell/_features/motor.py
Normal file
@@ -0,0 +1,45 @@
|
||||
"""``motor.*`` feature functions.
|
||||
|
||||
Step 2: ``motor.input_modality`` — typed / pasted / mixed.
|
||||
Step 3: ``motor.paste_burst_rate`` — none / occasional / habitual.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterator
|
||||
|
||||
from decnet_behave_core.spec.envelope import Observation
|
||||
|
||||
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||
from decnet.profiler.behave_shell._features._emit import make_observation
|
||||
from decnet.profiler.behave_shell._thresholds import (
|
||||
MODALITY_PASTED_MIN,
|
||||
MODALITY_TYPED_MAX,
|
||||
)
|
||||
|
||||
|
||||
def input_modality(ctx: SessionContext) -> Iterator[Observation]:
|
||||
"""Emit ``motor.input_modality`` ∈ {typed, pasted, mixed}.
|
||||
|
||||
Ratio of paste-class events to total inputs. Empty input → skip
|
||||
emission entirely (the registry doesn't admit ``unknown`` here
|
||||
and fabricating ``typed`` for a zero-input session is dishonest).
|
||||
"""
|
||||
n = len(ctx.input_events)
|
||||
if n == 0:
|
||||
return
|
||||
ratio = ctx.paste_event_count / n
|
||||
if ratio >= MODALITY_PASTED_MIN:
|
||||
modality = "pasted"
|
||||
confidence = 0.75
|
||||
elif ratio <= MODALITY_TYPED_MAX:
|
||||
modality = "typed"
|
||||
confidence = 0.75
|
||||
else:
|
||||
modality = "mixed"
|
||||
confidence = 0.70
|
||||
yield make_observation(
|
||||
ctx,
|
||||
primitive="motor.input_modality",
|
||||
value=modality,
|
||||
confidence=confidence,
|
||||
)
|
||||
@@ -24,8 +24,11 @@ from decnet.profiler.behave_shell._features import FEATURES
|
||||
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||
|
||||
|
||||
def test_features_tuple_is_empty_at_step_0() -> None:
|
||||
assert FEATURES == ()
|
||||
def test_features_tuple_is_populated() -> None:
|
||||
# Step 2+: at least one feature is registered. Exact membership is
|
||||
# asserted in per-feature tests; this test only pins "the registry
|
||||
# is non-empty" so the empty-FEATURES regression doesn't sneak back.
|
||||
assert len(FEATURES) >= 1
|
||||
|
||||
|
||||
def test_default_source_is_canonical_path() -> None:
|
||||
@@ -87,8 +90,7 @@ def test_extract_session_explicit_evidence_ref_overrides_default() -> None:
|
||||
assert ctx.evidence_ref == "shard:/var/log/d/sess-x.cast"
|
||||
|
||||
|
||||
def test_extract_session_with_features_still_empty() -> None:
|
||||
"""Until Step 2 lands, even a populated stream emits nothing."""
|
||||
events: list[AsciinemaEvent] = [(t / 10.0, "i", c) for t, c in enumerate("hello\r")]
|
||||
out = list(extract_session(events, sid="sess-features-empty"))
|
||||
def test_extract_session_zero_inputs_yields_nothing() -> None:
|
||||
"""No input events → no feature emits (input_modality skips on empty)."""
|
||||
out = list(extract_session([(0.0, "o", "hi\r\n")], sid="sess-no-input"))
|
||||
assert out == []
|
||||
|
||||
64
tests/profiler/behave_shell/test_motor_input_modality.py
Normal file
64
tests/profiler/behave_shell/test_motor_input_modality.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""Step 2: ``motor.input_modality`` — typed / pasted / mixed."""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.profiler.behave_shell import extract_session
|
||||
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||
|
||||
|
||||
def _by_primitive(observations: list, primitive: str):
|
||||
return [o for o in observations if o.primitive == primitive]
|
||||
|
||||
|
||||
def test_pure_typed_session_emits_typed() -> None:
|
||||
events: list[AsciinemaEvent] = [(i * 0.1, "i", c) for i, c in enumerate("ls\r")]
|
||||
out = list(extract_session(events, sid="sess-typed"))
|
||||
obs = _by_primitive(out, "motor.input_modality")
|
||||
assert len(obs) == 1
|
||||
assert obs[0].value == "typed"
|
||||
assert obs[0].confidence == 0.75
|
||||
|
||||
|
||||
def test_pure_pasted_session_emits_pasted() -> None:
|
||||
# Three large input events, no typing
|
||||
events: list[AsciinemaEvent] = [
|
||||
(0.0, "i", "echo first paste\r"),
|
||||
(1.0, "i", "echo second paste\r"),
|
||||
(2.0, "i", "echo third paste\r"),
|
||||
]
|
||||
out = list(extract_session(events, sid="sess-pasted"))
|
||||
obs = _by_primitive(out, "motor.input_modality")
|
||||
assert len(obs) == 1
|
||||
assert obs[0].value == "pasted"
|
||||
|
||||
|
||||
def test_mixed_session_emits_mixed() -> None:
|
||||
# 1 paste event + 9 single-char typed events → ratio 0.10 → in
|
||||
# between the typed (≤0.05) and pasted (≥0.40) thresholds → mixed
|
||||
events: list[AsciinemaEvent] = [(0.0, "i", "echo hello\r")]
|
||||
events += [(0.5 + i * 0.1, "i", c) for i, c in enumerate("ls -la\rps\r")]
|
||||
out = list(extract_session(events, sid="sess-mixed"))
|
||||
obs = _by_primitive(out, "motor.input_modality")
|
||||
assert len(obs) == 1
|
||||
assert obs[0].value == "mixed"
|
||||
|
||||
|
||||
def test_zero_input_session_emits_nothing_for_modality() -> None:
|
||||
# Output-only session: no honest answer, so we don't emit.
|
||||
events: list[AsciinemaEvent] = [(0.0, "o", "welcome\r\n")]
|
||||
out = list(extract_session(events, sid="sess-empty-input"))
|
||||
assert _by_primitive(out, "motor.input_modality") == []
|
||||
|
||||
|
||||
def test_observation_envelope_fields_are_populated() -> None:
|
||||
events: list[AsciinemaEvent] = [(0.0, "i", "echo paste paste\r")]
|
||||
out = list(extract_session(
|
||||
events, sid="sess-env", evidence_ref="shard:/blob/sess-env",
|
||||
))
|
||||
obs = _by_primitive(out, "motor.input_modality")[0]
|
||||
assert obs.source == "decnet/profiler/behave_shell/extract.py"
|
||||
assert obs.evidence_ref == "shard:/blob/sess-env"
|
||||
assert obs.window.start_ts == 0.0
|
||||
assert obs.window.end_ts == 0.0
|
||||
# envelope auto-populates id / ts / v
|
||||
assert obs.id and len(obs.id) > 0
|
||||
assert obs.v == 1
|
||||
Reference in New Issue
Block a user