BEHAVE-EXTRACTOR.md Phase B Step B.3. Replaces the prototype's
two-line "0 vs >0 backspaces" placeholder with a backspace-timing
classifier that honours the registry's full vocabulary.
* SessionContext gains backspace_count, backspace_iats (IAT from
each backspace back to the preceding non-backspace input event),
and kill_line_count (^U / ^W). Built by _scan_correction_signals,
which retains only counts and timing aggregates — no character
data leaves the helper, in line with the BEHAVE PII discipline.
* _features/motor.py:error_correction(ctx) emits one Observation
in {immediate, deferred, absent, route_around}.
- 0 backspaces + ≥1 ^U/^W → route_around (rewrite, not correct)
- 0 backspaces + 0 kill-lines → absent
- backspaces with median IAT ≤ 500 ms → immediate
- slower → deferred
Confidence 0.65 / 0.65 / 0.55 / 0.55.
* < 3 inputs → skip emit.
* Calibration grid widened to include motor.error_correction;
green across all five shards.
Tests cover all four buckets, the < 3 inputs skip, and the PII
regression (raw command body never appears in the serialised
observation).
208 lines
6.8 KiB
Python
208 lines
6.8 KiB
Python
"""``motor.*`` feature functions.
|
|
|
|
Step 2: ``motor.input_modality`` — typed / pasted / mixed.
|
|
Step 3: ``motor.paste_burst_rate`` — none / occasional / habitual.
|
|
Step B.1: ``motor.keystroke_cadence`` — steady / bursty / hunt_and_peck / machine.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import statistics
|
|
from itertools import chain
|
|
from typing import Iterator
|
|
|
|
from decnet_behave_core.spec.envelope import Observation
|
|
|
|
from decnet.profiler.behave_shell._ctx import SessionContext
|
|
from decnet.profiler.behave_shell._features._emit import make_observation
|
|
from decnet.profiler.behave_shell._thresholds import (
|
|
BACKSPACE_IMMEDIATE_MAX_S,
|
|
CV_BURSTY_MAX,
|
|
CV_MACHINE_MAX,
|
|
CV_STEADY_MAX,
|
|
IKI_MACHINE_MAX_S,
|
|
MIN_INPUTS_FOR_CADENCE,
|
|
MODALITY_PASTED_MIN,
|
|
MODALITY_TYPED_MAX,
|
|
PASTE_RATE_HABITUAL_MIN,
|
|
PASTE_RATE_OCCASIONAL_MIN,
|
|
TREMOR_FAST_FLOOR_S,
|
|
TREMOR_RATE_MIN,
|
|
)
|
|
|
|
|
|
def input_modality(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``motor.input_modality`` ∈ {typed, pasted, mixed}.
|
|
|
|
Ratio of paste-class events to total inputs. Empty input → skip
|
|
emission entirely (the registry doesn't admit ``unknown`` here
|
|
and fabricating ``typed`` for a zero-input session is dishonest).
|
|
"""
|
|
n = len(ctx.input_events)
|
|
if n == 0:
|
|
return
|
|
ratio = ctx.paste_event_count / n
|
|
if ratio >= MODALITY_PASTED_MIN:
|
|
modality = "pasted"
|
|
confidence = 0.75
|
|
elif ratio <= MODALITY_TYPED_MAX:
|
|
modality = "typed"
|
|
confidence = 0.75
|
|
else:
|
|
modality = "mixed"
|
|
confidence = 0.70
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="motor.input_modality",
|
|
value=modality,
|
|
confidence=confidence,
|
|
)
|
|
|
|
|
|
def paste_burst_rate(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``motor.paste_burst_rate`` ∈ {none, occasional, habitual}.
|
|
|
|
Same paste-event ratio as ``input_modality`` but coarser-bucketed:
|
|
this primitive is the *habit* signal (does the operator reach for
|
|
paste at all?), where input_modality is the dominant-channel
|
|
signal (is the session paste-driven overall?). Splits YOU-sim from
|
|
LW/CLAUDE-FF/CLAUDE-CL — LLM-driven sessions paste habitually,
|
|
real humans don't.
|
|
"""
|
|
n = len(ctx.input_events)
|
|
if n == 0:
|
|
return
|
|
ratio = ctx.paste_event_count / n
|
|
if ratio >= PASTE_RATE_HABITUAL_MIN:
|
|
level = "habitual"
|
|
confidence = 0.80
|
|
elif ratio >= PASTE_RATE_OCCASIONAL_MIN:
|
|
level = "occasional"
|
|
confidence = 0.70
|
|
else:
|
|
level = "none"
|
|
confidence = 0.70
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="motor.paste_burst_rate",
|
|
value=level,
|
|
confidence=confidence,
|
|
)
|
|
|
|
|
|
def keystroke_cadence(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``motor.keystroke_cadence`` ∈ {steady, bursty, hunt_and_peck, machine}.
|
|
|
|
Median CV of within-typing-burst IATs (bursts split at gaps >
|
|
``IKI_THINK_MAX_S`` so think-pauses between commands don't
|
|
inflate the variance). Pasted-only sessions and sessions below
|
|
``MIN_INPUTS_FOR_CADENCE`` skip emission — no honest cadence
|
|
available.
|
|
|
|
v0.1 emits only the burst-CV variant. The prototype's NAIVE
|
|
session-CV variant (lower confidence, second emission per
|
|
primitive) is parked for v0.2.
|
|
"""
|
|
if len(ctx.input_events) < MIN_INPUTS_FOR_CADENCE:
|
|
return
|
|
if not ctx.typing_bursts:
|
|
return
|
|
burst_cvs: list[float] = []
|
|
for b in ctx.typing_bursts:
|
|
m = statistics.fmean(b)
|
|
if m > 0:
|
|
burst_cvs.append(statistics.pstdev(b) / m)
|
|
if not burst_cvs:
|
|
return
|
|
cv = statistics.median(burst_cvs)
|
|
mean_iki = statistics.fmean(chain.from_iterable(ctx.typing_bursts))
|
|
if mean_iki < IKI_MACHINE_MAX_S and cv < CV_MACHINE_MAX:
|
|
value, confidence = "machine", 0.85
|
|
elif cv < CV_STEADY_MAX:
|
|
value, confidence = "steady", 0.70
|
|
elif cv < CV_BURSTY_MAX:
|
|
value, confidence = "bursty", 0.65
|
|
else:
|
|
value, confidence = "hunt_and_peck", 0.60
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="motor.keystroke_cadence",
|
|
value=value,
|
|
confidence=confidence,
|
|
)
|
|
|
|
|
|
def motor_stability(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``motor.motor_stability`` ∈ {steady, variable, tremor}.
|
|
|
|
First-pass tremor signal: fraction of within-typing-burst IATs
|
|
below ``TREMOR_FAST_FLOOR_S`` (30 ms — humans can't reliably
|
|
produce sustained sub-50 ms IATs). High sub-floor rate flags
|
|
double-press / motor twitch / stuck-key. Otherwise the same
|
|
median burst-CV used by ``keystroke_cadence`` decides
|
|
steady-vs-variable, with the cadence's CV_STEADY_MAX as the
|
|
boundary.
|
|
"""
|
|
if not ctx.typing_bursts:
|
|
return
|
|
flat = list(chain.from_iterable(ctx.typing_bursts))
|
|
if len(flat) < 5:
|
|
return
|
|
fast_rate = sum(1 for x in flat if x < TREMOR_FAST_FLOOR_S) / len(flat)
|
|
if fast_rate >= TREMOR_RATE_MIN:
|
|
value, confidence = "tremor", 0.65
|
|
else:
|
|
burst_cvs: list[float] = []
|
|
for b in ctx.typing_bursts:
|
|
m = statistics.fmean(b)
|
|
if m > 0:
|
|
burst_cvs.append(statistics.pstdev(b) / m)
|
|
cv = statistics.median(burst_cvs) if burst_cvs else 0.0
|
|
if cv < CV_STEADY_MAX:
|
|
value, confidence = "steady", 0.70
|
|
else:
|
|
value, confidence = "variable", 0.60
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="motor.motor_stability",
|
|
value=value,
|
|
confidence=confidence,
|
|
)
|
|
|
|
|
|
def error_correction(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``motor.error_correction`` ∈ {immediate, deferred, absent, route_around}.
|
|
|
|
Backspace timing relative to the preceding non-backspace key:
|
|
|
|
* 0 backspaces + ≥1 ^U/^W → ``route_around`` (operator killed
|
|
the line and rewrote rather than correcting in place).
|
|
* 0 backspaces + 0 ^U/^W → ``absent`` (no correction observed).
|
|
* Backspaces with median IAT ≤ ``BACKSPACE_IMMEDIATE_MAX_S``
|
|
(500 ms) → ``immediate`` (caught the typo mid-keystroke).
|
|
* Slower → ``deferred`` (paused, noticed, then went back).
|
|
|
|
< 3 input events → skip emission.
|
|
"""
|
|
if len(ctx.input_events) < 3:
|
|
return
|
|
if ctx.backspace_count == 0:
|
|
if ctx.kill_line_count > 0:
|
|
value, confidence = "route_around", 0.55
|
|
else:
|
|
value, confidence = "absent", 0.65
|
|
else:
|
|
if ctx.backspace_iats:
|
|
med = statistics.median(ctx.backspace_iats)
|
|
else:
|
|
med = float("inf")
|
|
if med <= BACKSPACE_IMMEDIATE_MAX_S:
|
|
value, confidence = "immediate", 0.65
|
|
else:
|
|
value, confidence = "deferred", 0.55
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="motor.error_correction",
|
|
value=value,
|
|
confidence=confidence,
|
|
)
|