From 4fc980e9683fa2a0d35a1f7ca862075857bd046d Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 3 May 2026 23:33:07 -0400 Subject: [PATCH] feat(profiler/behave_shell): emit motor.shell_mastery.shortcut_usage --- .../behave_shell/_features/__init__.py | 2 + .../profiler/behave_shell/_features/motor.py | 50 ++++++++ decnet/profiler/behave_shell/_thresholds.py | 12 ++ development/BEHAVE-EXTRACTOR.md | 2 +- .../behave_shell/test_calibration_grid.py | 1 + .../behave_shell/test_motor_shortcut_usage.py | 114 ++++++++++++++++++ 6 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 tests/profiler/behave_shell/test_motor_shortcut_usage.py diff --git a/decnet/profiler/behave_shell/_features/__init__.py b/decnet/profiler/behave_shell/_features/__init__.py index b3117aad..7f12618c 100644 --- a/decnet/profiler/behave_shell/_features/__init__.py +++ b/decnet/profiler/behave_shell/_features/__init__.py @@ -24,6 +24,7 @@ from decnet.profiler.behave_shell._features.motor import ( keystroke_cadence, motor_stability, paste_burst_rate, + shortcut_usage, tab_completion, ) @@ -37,6 +38,7 @@ FEATURES: tuple[FeatureFn, ...] = ( error_correction, command_chunking, tab_completion, + shortcut_usage, inter_command_latency_class, command_branch_diversity, feedback_loop_engagement, diff --git a/decnet/profiler/behave_shell/_features/motor.py b/decnet/profiler/behave_shell/_features/motor.py index 7653ce70..e62ddfd0 100644 --- a/decnet/profiler/behave_shell/_features/motor.py +++ b/decnet/profiler/behave_shell/_features/motor.py @@ -28,6 +28,8 @@ from decnet.profiler.behave_shell._thresholds import ( PASTE_RATE_OCCASIONAL_MIN, SHELL_MASTERY_BOUNDARY_BAND, SHELL_MASTERY_MIN_COMMANDS, + SHORTCUT_USAGE_HEAVY_MIN, + SHORTCUT_USAGE_MODERATE_MIN, TAB_COMPLETION_HABITUAL_MIN, TAB_COMPLETION_OCCASIONAL_MAX, TREMOR_FAST_FLOOR_S, @@ -316,3 +318,51 @@ def tab_completion(ctx: SessionContext) -> Iterator[Observation]: value=value, confidence=confidence, ) + + +def shortcut_usage(ctx: SessionContext) -> Iterator[Observation]: + """Emit ``motor.shell_mastery.shortcut_usage`` ∈ {none, moderate, heavy}. + + Metric: total readline ctrl-byte keystrokes (the seven in + :data:`SHORTCUT_CTRL_BYTES`) divided by command count. Registry + buckets are qualitative; v0.1 thresholds are pinned for corpus + calibration. Heavy users tend to be tmux/zsh/bash power operators + who edit lines in place rather than retyping. + + Confidence: + * < ``SHELL_MASTERY_MIN_COMMANDS`` → 0.40. + * Within ±10% of either bucket boundary → 0.55. + * Otherwise → 0.65 (lower than tab_completion: thresholds are + not yet corpus-calibrated, mirrors ``motor_stability`` posture). + + Skips emission when the session has no commands at all. + """ + n = len(ctx.commands) + if n == 0: + return + total_shortcuts = sum(c.shortcut_count for c in ctx.commands) + rate = total_shortcuts / n + + if total_shortcuts == 0 or rate < SHORTCUT_USAGE_MODERATE_MIN: + value = "none" + elif rate < SHORTCUT_USAGE_HEAVY_MIN: + value = "moderate" + else: + value = "heavy" + + if n < SHELL_MASTERY_MIN_COMMANDS: + confidence = 0.40 + elif ( + _near(rate, SHORTCUT_USAGE_MODERATE_MIN) + or _near(rate, SHORTCUT_USAGE_HEAVY_MIN) + ): + confidence = 0.55 + else: + confidence = 0.65 + + yield make_observation( + ctx, + primitive="motor.shell_mastery.shortcut_usage", + value=value, + confidence=confidence, + ) diff --git a/decnet/profiler/behave_shell/_thresholds.py b/decnet/profiler/behave_shell/_thresholds.py index d44562c5..22c1ab92 100644 --- a/decnet/profiler/behave_shell/_thresholds.py +++ b/decnet/profiler/behave_shell/_thresholds.py @@ -130,6 +130,18 @@ SHORTCUT_CTRL_BYTES: frozenset[str] = frozenset({ TAB_COMPLETION_OCCASIONAL_MAX: float = 0.30 TAB_COMPLETION_HABITUAL_MIN: float = 0.50 +# motor.shell_mastery.shortcut_usage — total readline ctrl-byte +# keystrokes per command. Registry buckets are qualitative +# (``none / moderate / heavy``); v0.1 thresholds are best-guesses +# pinned for five-class corpus calibration. Re-tune once HUMAN / +# YOU-sim / LW-sim / CLAUDE-FF / CLAUDE-CL data lands. +# 0/cmd → none +# <0.05/cmd → none (counted shortcuts but rare; rounds down) +# 0.05-0.30 → moderate +# ≥0.30/cmd → heavy +SHORTCUT_USAGE_MODERATE_MIN: float = 0.05 +SHORTCUT_USAGE_HEAVY_MIN: float = 0.30 + # Sample-size floor below which Phase C primitives drop confidence to # 0.40 (sample-size honesty). Mirrors MIN_COMMANDS_FOR_FULL_CONFIDENCE # but is named separately so a future tune can move them independently. diff --git a/development/BEHAVE-EXTRACTOR.md b/development/BEHAVE-EXTRACTOR.md index 08228fc7..97276b82 100644 --- a/development/BEHAVE-EXTRACTOR.md +++ b/development/BEHAVE-EXTRACTOR.md @@ -647,7 +647,7 @@ unchecked = no v0 tag.** ### Phase C — `motor.shell_mastery.*` - [x] C.1 `motor.shell_mastery.tab_completion` -- [ ] C.2 `motor.shell_mastery.shortcut_usage` +- [x] C.2 `motor.shell_mastery.shortcut_usage` - [ ] C.3 `motor.shell_mastery.pipe_chaining_depth` ### Phase D — `cognitive.*` completion diff --git a/tests/profiler/behave_shell/test_calibration_grid.py b/tests/profiler/behave_shell/test_calibration_grid.py index 064158f3..f1165705 100644 --- a/tests/profiler/behave_shell/test_calibration_grid.py +++ b/tests/profiler/behave_shell/test_calibration_grid.py @@ -46,6 +46,7 @@ PHASE_ABC_PRIMITIVES: frozenset[str] = frozenset({ "motor.command_chunking", # Phase C — motor.shell_mastery.* (lands one primitive per commit) "motor.shell_mastery.tab_completion", + "motor.shell_mastery.shortcut_usage", }) diff --git a/tests/profiler/behave_shell/test_motor_shortcut_usage.py b/tests/profiler/behave_shell/test_motor_shortcut_usage.py new file mode 100644 index 00000000..5c5462ec --- /dev/null +++ b/tests/profiler/behave_shell/test_motor_shortcut_usage.py @@ -0,0 +1,114 @@ +"""Step C.2: ``motor.shell_mastery.shortcut_usage``.""" +from __future__ import annotations + +from decnet.profiler.behave_shell import extract_session +from decnet.profiler.behave_shell._ctx import build_session_context +from decnet.profiler.behave_shell._parse import AsciinemaEvent + +PRIMITIVE = "motor.shell_mastery.shortcut_usage" + +# Three of the seven readline shortcuts; using distinct codes ensures +# we are counting bytes, not just one specific char. +CTRL_A = "\x01" +CTRL_E = "\x05" +CTRL_R = "\x12" + + +def _of(observations: list, primitive: str): + obs = [o for o in observations if o.primitive == primitive] + assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}" + return obs[0] + + +def _command(t0: float, body: str) -> list[AsciinemaEvent]: + events: list[AsciinemaEvent] = [] + t = t0 + for c in body: + events.append((t, "i", c)) + t += 0.05 + events.append((t, "i", "\r")) + return events + + +def _session(bodies: list[str], gap: float = 1.0) -> list[AsciinemaEvent]: + events: list[AsciinemaEvent] = [] + t = 0.0 + for body in bodies: + events.extend(_command(t, body)) + t = events[-1][0] + gap + return events + + +def test_no_commands_no_emission() -> None: + out = list(extract_session([(0.0, "i", "ls")], sid="sc-empty")) + assert [o for o in out if o.primitive == PRIMITIVE] == [] + + +def test_zero_shortcuts_emit_none() -> None: + out = list(extract_session(_session(["ls", "pwd", "id", "uname", "whoami"]), + sid="sc-none")) + obs = _of(out, PRIMITIVE) + assert obs.value == "none" + assert obs.confidence == 0.65 + + +def test_moderate_rate_emits_moderate() -> None: + # 2 ctrl bytes across 10 commands = 0.20/cmd → moderate; not near + # either of the 0.05 / 0.30 boundaries (>10%). + bodies = [f"ls{CTRL_A}"] * 2 + ["pwd"] * 8 + out = list(extract_session(_session(bodies), sid="sc-moderate")) + obs = _of(out, PRIMITIVE) + assert obs.value == "moderate" + assert obs.confidence == 0.65 + + +def test_heavy_rate_emits_heavy() -> None: + # 10 ctrl bytes across 5 commands = 2.0/cmd → heavy. + bodies = [f"ls{CTRL_A}{CTRL_E}", f"vi{CTRL_R}f", f"cd{CTRL_A}"] + [ + f"cat{CTRL_R}", f"ps{CTRL_E}" + ] + out = list(extract_session(_session(bodies), sid="sc-heavy")) + obs = _of(out, PRIMITIVE) + assert obs.value == "heavy" + + +def test_sub_threshold_rate_rounds_to_none() -> None: + # 1 ctrl byte across 50 commands = 0.02/cmd, below MODERATE_MIN. + bodies = [f"ls{CTRL_A}"] + ["pwd"] * 49 + out = list(extract_session(_session(bodies, gap=0.5), sid="sc-rounddown")) + obs = _of(out, PRIMITIVE) + assert obs.value == "none" + + +def test_near_boundary_drops_confidence() -> None: + # 3 ctrl bytes across 10 commands = 0.30/cmd — exactly the heavy + # boundary. Confidence drops. + bodies = [f"ls{CTRL_A}{CTRL_E}{CTRL_R}"] + ["pwd"] * 9 + out = list(extract_session(_session(bodies), sid="sc-boundary")) + obs = _of(out, PRIMITIVE) + assert obs.confidence == 0.55 + + +def test_few_commands_drops_confidence() -> None: + out = list(extract_session(_session(["ls", "pwd", "id", "exit"]), + sid="sc-low-n")) + obs = _of(out, PRIMITIVE) + assert obs.confidence == 0.40 + + +def test_segmentation_populates_shortcut_count() -> None: + """Multiple distinct ctrl bytes inside one command count once each; + counters reset on the command boundary.""" + events = _command(0.0, f"ls{CTRL_A}{CTRL_E}{CTRL_R}") + _command(5.0, "pwd") + ctx = build_session_context(events, sid="seg-sc", source="t") + assert len(ctx.commands) == 2 + assert ctx.commands[0].shortcut_count == 3 + assert ctx.commands[1].shortcut_count == 0 + + +def test_non_shortcut_ctrl_bytes_not_counted() -> None: + """Only the seven pinned ctrl bytes count. ^C (0x03) / ^L (0x0c) + must not bump shortcut_count.""" + events = _command(0.0, "ls\x03\x0c") + ctx = build_session_context(events, sid="seg-sc-other", source="t") + assert ctx.commands[0].shortcut_count == 0