From 879f5e731bb0b5488c12067497f0245869503291 Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 3 May 2026 07:47:38 -0400 Subject: [PATCH] feat(profiler/behave_shell): emit motor.input_modality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BEHAVE-EXTRACTOR.md Phase A Step 2. The first primitive — picked first because it has the highest discriminative value (HUMAN vs everyone) and the simplest implementation (paste-event ratio over total inputs). * _features/motor.py:input_modality(ctx) emits one Observation per session in {typed, pasted, mixed} with confidence 0.75 / 0.70. * _features/_emit.py centralises the make_observation helper so every feature module gets the same Window/source/evidence_ref boilerplate without copy-paste. * Thresholds inherited from the prototype's calibration history (MODALITY_PASTED_MIN=0.40, MODALITY_TYPED_MAX=0.05). * Zero-input session skips emission — registry doesn't admit "unknown" here. Tests: pure-typed → typed, pure-pasted → pasted, mixed → mixed, output-only session → no observation, full envelope round-trip. --- .../behave_shell/_features/__init__.py | 8 +-- .../profiler/behave_shell/_features/_emit.py | 32 ++++++++++ .../profiler/behave_shell/_features/motor.py | 45 +++++++++++++ .../behave_shell/test_extract_smoke.py | 14 ++-- .../behave_shell/test_motor_input_modality.py | 64 +++++++++++++++++++ 5 files changed, 153 insertions(+), 10 deletions(-) create mode 100644 decnet/profiler/behave_shell/_features/_emit.py create mode 100644 decnet/profiler/behave_shell/_features/motor.py create mode 100644 tests/profiler/behave_shell/test_motor_input_modality.py diff --git a/decnet/profiler/behave_shell/_features/__init__.py b/decnet/profiler/behave_shell/_features/__init__.py index 768a7fc6..68ba4548 100644 --- a/decnet/profiler/behave_shell/_features/__init__.py +++ b/decnet/profiler/behave_shell/_features/__init__.py @@ -3,9 +3,6 @@ Each entry takes a ``SessionContext`` and yields zero or more ``Observation`` instances. Adding a primitive = adding a function in a sibling module and appending it to ``FEATURES``. - -Step 0 ships an empty tuple — extract_session() is wired but emits -nothing until Step 2. """ from __future__ import annotations @@ -14,7 +11,10 @@ from typing import Callable, Iterable from decnet_behave_core.spec.envelope import Observation from decnet.profiler.behave_shell._ctx import SessionContext +from decnet.profiler.behave_shell._features.motor import input_modality FeatureFn = Callable[[SessionContext], Iterable[Observation]] -FEATURES: tuple[FeatureFn, ...] = () +FEATURES: tuple[FeatureFn, ...] = ( + input_modality, +) diff --git a/decnet/profiler/behave_shell/_features/_emit.py b/decnet/profiler/behave_shell/_features/_emit.py new file mode 100644 index 00000000..33c5fa51 --- /dev/null +++ b/decnet/profiler/behave_shell/_features/_emit.py @@ -0,0 +1,32 @@ +"""Helper for building registry-valid :class:`Observation` records. + +Every feature module would otherwise repeat the same Window / +source / evidence_ref boilerplate. This helper centralises it and is +the one place to reach when emission semantics change (e.g. when we +start parametrising windows on a per-primitive basis). +""" +from __future__ import annotations + +from typing import Any + +from decnet_behave_core.spec.envelope import Observation, Window + +from decnet.profiler.behave_shell._ctx import SessionContext + + +def make_observation( + ctx: SessionContext, + *, + primitive: str, + value: Any, + confidence: float, +) -> Observation: + """Build one :class:`Observation` for the whole-session window.""" + return Observation( + primitive=primitive, + value=value, + confidence=confidence, + window=Window(start_ts=ctx.t_start, end_ts=ctx.t_end), + source=ctx.source, + evidence_ref=ctx.evidence_ref, + ) diff --git a/decnet/profiler/behave_shell/_features/motor.py b/decnet/profiler/behave_shell/_features/motor.py new file mode 100644 index 00000000..b4e604a5 --- /dev/null +++ b/decnet/profiler/behave_shell/_features/motor.py @@ -0,0 +1,45 @@ +"""``motor.*`` feature functions. + +Step 2: ``motor.input_modality`` — typed / pasted / mixed. +Step 3: ``motor.paste_burst_rate`` — none / occasional / habitual. +""" +from __future__ import annotations + +from typing import Iterator + +from decnet_behave_core.spec.envelope import Observation + +from decnet.profiler.behave_shell._ctx import SessionContext +from decnet.profiler.behave_shell._features._emit import make_observation +from decnet.profiler.behave_shell._thresholds import ( + MODALITY_PASTED_MIN, + MODALITY_TYPED_MAX, +) + + +def input_modality(ctx: SessionContext) -> Iterator[Observation]: + """Emit ``motor.input_modality`` ∈ {typed, pasted, mixed}. + + Ratio of paste-class events to total inputs. Empty input → skip + emission entirely (the registry doesn't admit ``unknown`` here + and fabricating ``typed`` for a zero-input session is dishonest). + """ + n = len(ctx.input_events) + if n == 0: + return + ratio = ctx.paste_event_count / n + if ratio >= MODALITY_PASTED_MIN: + modality = "pasted" + confidence = 0.75 + elif ratio <= MODALITY_TYPED_MAX: + modality = "typed" + confidence = 0.75 + else: + modality = "mixed" + confidence = 0.70 + yield make_observation( + ctx, + primitive="motor.input_modality", + value=modality, + confidence=confidence, + ) diff --git a/tests/profiler/behave_shell/test_extract_smoke.py b/tests/profiler/behave_shell/test_extract_smoke.py index 59298a13..9a02d25c 100644 --- a/tests/profiler/behave_shell/test_extract_smoke.py +++ b/tests/profiler/behave_shell/test_extract_smoke.py @@ -24,8 +24,11 @@ from decnet.profiler.behave_shell._features import FEATURES from decnet.profiler.behave_shell._parse import AsciinemaEvent -def test_features_tuple_is_empty_at_step_0() -> None: - assert FEATURES == () +def test_features_tuple_is_populated() -> None: + # Step 2+: at least one feature is registered. Exact membership is + # asserted in per-feature tests; this test only pins "the registry + # is non-empty" so the empty-FEATURES regression doesn't sneak back. + assert len(FEATURES) >= 1 def test_default_source_is_canonical_path() -> None: @@ -87,8 +90,7 @@ def test_extract_session_explicit_evidence_ref_overrides_default() -> None: assert ctx.evidence_ref == "shard:/var/log/d/sess-x.cast" -def test_extract_session_with_features_still_empty() -> None: - """Until Step 2 lands, even a populated stream emits nothing.""" - events: list[AsciinemaEvent] = [(t / 10.0, "i", c) for t, c in enumerate("hello\r")] - out = list(extract_session(events, sid="sess-features-empty")) +def test_extract_session_zero_inputs_yields_nothing() -> None: + """No input events → no feature emits (input_modality skips on empty).""" + out = list(extract_session([(0.0, "o", "hi\r\n")], sid="sess-no-input")) assert out == [] diff --git a/tests/profiler/behave_shell/test_motor_input_modality.py b/tests/profiler/behave_shell/test_motor_input_modality.py new file mode 100644 index 00000000..fc341eba --- /dev/null +++ b/tests/profiler/behave_shell/test_motor_input_modality.py @@ -0,0 +1,64 @@ +"""Step 2: ``motor.input_modality`` — typed / pasted / mixed.""" +from __future__ import annotations + +from decnet.profiler.behave_shell import extract_session +from decnet.profiler.behave_shell._parse import AsciinemaEvent + + +def _by_primitive(observations: list, primitive: str): + return [o for o in observations if o.primitive == primitive] + + +def test_pure_typed_session_emits_typed() -> None: + events: list[AsciinemaEvent] = [(i * 0.1, "i", c) for i, c in enumerate("ls\r")] + out = list(extract_session(events, sid="sess-typed")) + obs = _by_primitive(out, "motor.input_modality") + assert len(obs) == 1 + assert obs[0].value == "typed" + assert obs[0].confidence == 0.75 + + +def test_pure_pasted_session_emits_pasted() -> None: + # Three large input events, no typing + events: list[AsciinemaEvent] = [ + (0.0, "i", "echo first paste\r"), + (1.0, "i", "echo second paste\r"), + (2.0, "i", "echo third paste\r"), + ] + out = list(extract_session(events, sid="sess-pasted")) + obs = _by_primitive(out, "motor.input_modality") + assert len(obs) == 1 + assert obs[0].value == "pasted" + + +def test_mixed_session_emits_mixed() -> None: + # 1 paste event + 9 single-char typed events → ratio 0.10 → in + # between the typed (≤0.05) and pasted (≥0.40) thresholds → mixed + events: list[AsciinemaEvent] = [(0.0, "i", "echo hello\r")] + events += [(0.5 + i * 0.1, "i", c) for i, c in enumerate("ls -la\rps\r")] + out = list(extract_session(events, sid="sess-mixed")) + obs = _by_primitive(out, "motor.input_modality") + assert len(obs) == 1 + assert obs[0].value == "mixed" + + +def test_zero_input_session_emits_nothing_for_modality() -> None: + # Output-only session: no honest answer, so we don't emit. + events: list[AsciinemaEvent] = [(0.0, "o", "welcome\r\n")] + out = list(extract_session(events, sid="sess-empty-input")) + assert _by_primitive(out, "motor.input_modality") == [] + + +def test_observation_envelope_fields_are_populated() -> None: + events: list[AsciinemaEvent] = [(0.0, "i", "echo paste paste\r")] + out = list(extract_session( + events, sid="sess-env", evidence_ref="shard:/blob/sess-env", + )) + obs = _by_primitive(out, "motor.input_modality")[0] + assert obs.source == "decnet/profiler/behave_shell/extract.py" + assert obs.evidence_ref == "shard:/blob/sess-env" + assert obs.window.start_ts == 0.0 + assert obs.window.end_ts == 0.0 + # envelope auto-populates id / ts / v + assert obs.id and len(obs.id) > 0 + assert obs.v == 1