feat(profiler/behave_shell): emit motor.shell_mastery.tab_completion

2026-05-03 23:31:20 -04:00
parent 771944830a
commit a077cf67c8
8 changed files with 246 additions and 8 deletions
--- a/decnet/profiler/behave_shell/_ctx.py
+++ b/decnet/profiler/behave_shell/_ctx.py
@@ -23,6 +23,7 @@ from decnet.profiler.behave_shell._thresholds import (
    IKI_THINK_MAX_S,
    PASTE_BURST_MAX_IAT_S,
    PASTE_MIN_CHARS_PER_EVENT,
+    SHORTCUT_CTRL_BYTES,
 )


@@ -164,14 +165,26 @@ def _split_typing_bursts(iats: tuple[float, ...]) -> tuple[tuple[float, ...], ..
 def _segment_commands(inputs: list[AsciinemaEvent]) -> tuple[Command, ...]:
    """Walk input events, splitting on ``\\r`` / ``\\n`` into commands.

-    PII discipline: only the first whitespace-delimited token is
-    retained, and only as a sha256 hash. Buffer contents are dropped
-    on every command boundary; an unterminated trailing buffer (no
-    final newline) yields no command.
+    Retains only the first whitespace-delimited token as a sha256 hash
+    plus three integer counters needed for the Phase C
+    ``motor.shell_mastery.*`` primitives:
+
+    * ``tab_count``      — ``\\t`` (0x09) keystrokes in the command
+    * ``shortcut_count`` — readline control bytes from
+      :data:`SHORTCUT_CTRL_BYTES`
+    * ``pipe_count``     — ``|`` characters in the command (counted on
+      every byte; pasted pipelines still indicate pipeline fluency the
+      operator chose to execute)
+
+    Buffer contents are dropped on every command boundary; an
+    unterminated trailing buffer (no final newline) yields no command.
    """
    cmds: list[Command] = []
    buf_chars: list[str] = []
    buf_start_ts: float | None = None
+    tab_count = 0
+    shortcut_count = 0
+    pipe_count = 0

    for t, _kind, data in inputs:
        for c in data:
@@ -183,13 +196,25 @@ def _segment_commands(inputs: list[AsciinemaEvent]) -> tuple[Command, ...]:
                        start_ts=buf_start_ts if buf_start_ts is not None else t,
                        end_ts=t,
                        first_token_hash=hash_token(first_token),
+                        tab_count=tab_count,
+                        shortcut_count=shortcut_count,
+                        pipe_count=pipe_count,
                    ))
                buf_chars = []
                buf_start_ts = None
+                tab_count = 0
+                shortcut_count = 0
+                pipe_count = 0
            else:
                if not buf_chars:
                    buf_start_ts = t
                buf_chars.append(c)
+                if c == "\t":
+                    tab_count += 1
+                elif c == "|":
+                    pipe_count += 1
+                elif c in SHORTCUT_CTRL_BYTES:
+                    shortcut_count += 1

    return tuple(cmds)

--- a/decnet/profiler/behave_shell/_features/init.py
+++ b/decnet/profiler/behave_shell/_features/init.py
@@ -24,6 +24,7 @@ from decnet.profiler.behave_shell._features.motor import (
    keystroke_cadence,
    motor_stability,
    paste_burst_rate,
+    tab_completion,
 )

 FeatureFn = Callable[[SessionContext], Iterable[Observation]]
@@ -35,6 +36,7 @@ FEATURES: tuple[FeatureFn, ...] = (
    motor_stability,
    error_correction,
    command_chunking,
+    tab_completion,
    inter_command_latency_class,
    command_branch_diversity,
    feedback_loop_engagement,
--- a/decnet/profiler/behave_shell/_features/motor.py
+++ b/decnet/profiler/behave_shell/_features/motor.py
@@ -26,11 +26,25 @@ from decnet.profiler.behave_shell._thresholds import (
    MODALITY_TYPED_MAX,
    PASTE_RATE_HABITUAL_MIN,
    PASTE_RATE_OCCASIONAL_MIN,
+    SHELL_MASTERY_BOUNDARY_BAND,
+    SHELL_MASTERY_MIN_COMMANDS,
+    TAB_COMPLETION_HABITUAL_MIN,
+    TAB_COMPLETION_OCCASIONAL_MAX,
    TREMOR_FAST_FLOOR_S,
    TREMOR_RATE_MIN,
 )


+def _near(value: float, boundary: float) -> bool:
+    """True iff ``value`` is within ``SHELL_MASTERY_BOUNDARY_BAND`` of
+    ``boundary`` (relative to the boundary). Phase C uses this to drop
+    confidence when a measurement sits on a bucket fence.
+    """
+    if boundary == 0:
+        return abs(value) <= SHELL_MASTERY_BOUNDARY_BAND
+    return abs(value - boundary) / boundary <= SHELL_MASTERY_BOUNDARY_BAND
+
+
 def input_modality(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``motor.input_modality`` ∈ {typed, pasted, mixed}.

@@ -252,3 +266,53 @@ def command_chunking(ctx: SessionContext) -> Iterator[Observation]:
        value=value,
        confidence=confidence,
    )
+
+
+def tab_completion(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``motor.shell_mastery.tab_completion`` ∈ {none, occasional, habitual}.
+
+    Metric: fraction of commands containing at least one ``\\t`` keystroke.
+    A pasted full command line that happens to embed a tab still counts —
+    the operator chose to send the bytes — but in practice tab keystrokes
+    only arrive interactively, so this is dominated by typed sessions.
+
+    Confidence:
+    * < ``SHELL_MASTERY_MIN_COMMANDS`` → 0.40 (sample-size honesty).
+    * Within ±10% of either bucket boundary → 0.55 (threshold proximity).
+    * Otherwise → 0.75.
+
+    Skips emission when the session has no commands at all (no honest
+    ratio to report; the registry doesn't admit ``unknown`` here).
+    """
+    n = len(ctx.commands)
+    if n == 0:
+        return
+    commands_with_tab = sum(1 for c in ctx.commands if c.tab_count > 0)
+    ratio = commands_with_tab / n
+
+    if ratio == 0.0:
+        value = "none"
+    elif ratio < TAB_COMPLETION_OCCASIONAL_MAX:
+        value = "occasional"
+    elif ratio < TAB_COMPLETION_HABITUAL_MIN:
+        # Registry's own gap (30%-<50%) — round down rather than up.
+        value = "occasional"
+    else:
+        value = "habitual"
+
+    if n < SHELL_MASTERY_MIN_COMMANDS:
+        confidence = 0.40
+    elif (
+        _near(ratio, TAB_COMPLETION_OCCASIONAL_MAX)
+        or _near(ratio, TAB_COMPLETION_HABITUAL_MIN)
+    ):
+        confidence = 0.55
+    else:
+        confidence = 0.75
+
+    yield make_observation(
+        ctx,
+        primitive="motor.shell_mastery.tab_completion",
+        value=value,
+        confidence=confidence,
+    )
--- a/decnet/profiler/behave_shell/_parse.py
+++ b/decnet/profiler/behave_shell/_parse.py
@@ -47,11 +47,20 @@ class Command:
    ``end_ts`` is the timestamp of the ``\\r`` / ``\\n`` that
    terminated the command; ``start_ts`` is the first character typed
    or pasted into it.
+
+    ``tab_count`` / ``shortcut_count`` / ``pipe_count`` are integer
+    counters populated by the context builder during the per-command
+    byte sweep. They feed the ``motor.shell_mastery.*`` primitives
+    (Phase C). The raw bytes themselves are read once during the
+    sweep and discarded — only the counters are retained.
    """

    start_ts: float
    end_ts: float
    first_token_hash: str
+    tab_count: int = 0
+    shortcut_count: int = 0
+    pipe_count: int = 0


 def hash_token(token: str) -> str:
--- a/decnet/profiler/behave_shell/_thresholds.py
+++ b/decnet/profiler/behave_shell/_thresholds.py
@@ -109,3 +109,33 @@ BACKSPACE_IMMEDIATE_MAX_S: float = 0.50
 # Median CV of within-command IATs. Below this → fluent (steady within
 # each command); above → fragmented (operator pauses mid-command).
 CMD_CHUNKING_FLUENT_CV_MAX: float = 0.50
+
+# ── motor.shell_mastery.* (Phase C) ─────────────────────────────────────────
+# Readline control bytes counted toward ``shortcut_usage``. The seven
+# pinned by BEHAVE-EXTRACTOR.md §Phase C (line 472):
+#   ^A start-of-line  ^E end-of-line  ^W kill-prev-word
+#   ^U kill-line      ^R reverse-i-search  ^B back-char  ^F forward-char
+# v0.2 may extend to ^K/^Y/^L/^D/^P/^N once corpus calibration justifies it.
+# Note: ^U / ^W also feed ``motor.error_correction`` (Step B.3) via the
+# ``kill_line_count`` channel — these are independent measurements over
+# the same byte stream, not double-counting.
+SHORTCUT_CTRL_BYTES: frozenset[str] = frozenset({
+    "\x01", "\x05", "\x17", "\x15", "\x12", "\x02", "\x06",
+})
+
+# motor.shell_mastery.tab_completion — fraction of commands containing
+# at least one ``\t`` keystroke. Registry buckets per BEHAVE-EXTRACTOR.md
+# line 471: ``none`` (0%), ``occasional`` (<30%), ``habitual`` (≥50%).
+# The 30%-50% gap rounds down to ``occasional`` — the registry's own gap.
+TAB_COMPLETION_OCCASIONAL_MAX: float = 0.30
+TAB_COMPLETION_HABITUAL_MIN: float = 0.50
+
+# Sample-size floor below which Phase C primitives drop confidence to
+# 0.40 (sample-size honesty). Mirrors MIN_COMMANDS_FOR_FULL_CONFIDENCE
+# but is named separately so a future tune can move them independently.
+SHELL_MASTERY_MIN_COMMANDS: int = 5
+
+# Width of the "near a bucket boundary" band (relative to the boundary)
+# used by Phase C primitives. ±10% of the boundary value drops
+# confidence by 0.20 per BEHAVE-EXTRACTOR.md §"Threshold proximity".
+SHELL_MASTERY_BOUNDARY_BAND: float = 0.10
--- a/development/BEHAVE-EXTRACTOR.md
+++ b/development/BEHAVE-EXTRACTOR.md
@@ -646,7 +646,7 @@ unchecked = no v0 tag.**
 - [x] B.4 `motor.command_chunking`

 ### Phase C — `motor.shell_mastery.*`
- [ ] C.1 `motor.shell_mastery.tab_completion`
+- [x] C.1 `motor.shell_mastery.tab_completion`
 - [ ] C.2 `motor.shell_mastery.shortcut_usage`
 - [ ] C.3 `motor.shell_mastery.pipe_chaining_depth`

--- a/tests/profiler/behave_shell/test_calibration_grid.py
+++ b/tests/profiler/behave_shell/test_calibration_grid.py
@@ -31,7 +31,7 @@ from decnet.profiler.behave_shell import extract_session
 from decnet.profiler.behave_shell._parse import parse_shard_line


-PHASE_AB_PRIMITIVES: frozenset[str] = frozenset({
+PHASE_ABC_PRIMITIVES: frozenset[str] = frozenset({
    # Phase A — calibration floor
    "motor.input_modality",
    "motor.paste_burst_rate",
@@ -44,6 +44,8 @@ PHASE_AB_PRIMITIVES: frozenset[str] = frozenset({
    "motor.motor_stability",
    "motor.error_correction",
    "motor.command_chunking",
+    # Phase C — motor.shell_mastery.* (lands one primitive per commit)
+    "motor.shell_mastery.tab_completion",
 })


@@ -111,7 +113,7 @@ def test_shard_emits_all_phase_a_primitives(
    obs = _all_observations(path)
    assert obs, f"{class_label}: extractor produced zero observations"
    seen = {o.primitive for o in obs}
-    missing = PHASE_AB_PRIMITIVES - seen
+    missing = PHASE_ABC_PRIMITIVES - seen
    assert not missing, (
        f"{class_label} ({shard_file}) missing primitives: "
        f"{sorted(missing)}"
@@ -148,7 +150,7 @@ def test_shards_are_discriminative_across_classes(
    # At least one primitive should produce different majority values
    # across the present classes.
    discriminative_primitives: list[str] = []
-    for prim in PHASE_AB_PRIMITIVES:
+    for prim in PHASE_ABC_PRIMITIVES:
        values = {by_class[c].get(prim) for c in by_class if prim in by_class[c]}
        if len(values) >= 2:
            discriminative_primitives.append(prim)
--- a/tests/profiler/behave_shell/test_motor_tab_completion.py
+++ b/tests/profiler/behave_shell/test_motor_tab_completion.py
@@ -0,0 +1,106 @@
+"""Step C.1: ``motor.shell_mastery.tab_completion``."""
+from __future__ import annotations
+
+from decnet.profiler.behave_shell import extract_session
+from decnet.profiler.behave_shell._ctx import build_session_context
+from decnet.profiler.behave_shell._parse import AsciinemaEvent
+
+PRIMITIVE = "motor.shell_mastery.tab_completion"
+
+
+def _of(observations: list, primitive: str):
+    obs = [o for o in observations if o.primitive == primitive]
+    assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
+    return obs[0]
+
+
+def _command(t0: float, body: str) -> list[AsciinemaEvent]:
+    """One command at ``t0``: every byte of ``body`` then a ``\\r``.
+
+    Bytes arrive 50ms apart so the segmentation logic sees event-level
+    timestamps that fall inside the synthesised command window.
+    """
+    events: list[AsciinemaEvent] = []
+    t = t0
+    for c in body:
+        events.append((t, "i", c))
+        t += 0.05
+    events.append((t, "i", "\r"))
+    return events
+
+
+def _session(bodies: list[str], gap: float = 1.0) -> list[AsciinemaEvent]:
+    events: list[AsciinemaEvent] = []
+    t = 0.0
+    for body in bodies:
+        events.extend(_command(t, body))
+        t = events[-1][0] + gap
+    return events
+
+
+def test_no_commands_no_emission() -> None:
+    """No \\r/\\n → no commands → no honest ratio to report."""
+    out = list(extract_session([(0.0, "i", "ls")], sid="tab-empty"))
+    assert [o for o in out if o.primitive == PRIMITIVE] == []
+
+
+def test_zero_tabs_emit_none() -> None:
+    out = list(extract_session(_session(["ls", "pwd", "id", "uname", "whoami", "date"]),
+                               sid="tab-none"))
+    obs = _of(out, PRIMITIVE)
+    assert obs.value == "none"
+    assert obs.confidence == 0.75
+
+
+def test_majority_tabs_emit_habitual() -> None:
+    # 5 of 6 commands carry a \t → ratio ≈ 0.83, well above 0.50.
+    bodies = ["ls\t", "cd\t/tmp", "ec\thello", "cat\tf", "vi\t", "exit"]
+    out = list(extract_session(_session(bodies), sid="tab-habitual"))
+    obs = _of(out, PRIMITIVE)
+    assert obs.value == "habitual"
+    assert obs.confidence == 0.75
+
+
+def test_low_tab_rate_emits_occasional() -> None:
+    # 2 of 10 → ratio 0.20 (below 0.30, above 0); not near a boundary.
+    bodies = ["ls\t"] * 2 + ["pwd"] * 8
+    out = list(extract_session(_session(bodies), sid="tab-occasional"))
+    obs = _of(out, PRIMITIVE)
+    assert obs.value == "occasional"
+    assert obs.confidence == 0.75
+
+
+def test_gap_band_rounds_down_to_occasional() -> None:
+    # 4 of 10 → ratio 0.40, sits in the registry's 30%-50% gap which
+    # we round DOWN to occasional. Not near either boundary at >10%.
+    bodies = ["ls\t"] * 4 + ["pwd"] * 6
+    out = list(extract_session(_session(bodies), sid="tab-gap"))
+    obs = _of(out, PRIMITIVE)
+    assert obs.value == "occasional"
+
+
+def test_near_boundary_drops_confidence() -> None:
+    # 3 of 10 → 0.30 — exactly the occasional boundary. Confidence drops.
+    bodies = ["ls\t"] * 3 + ["pwd"] * 7
+    out = list(extract_session(_session(bodies), sid="tab-boundary"))
+    obs = _of(out, PRIMITIVE)
+    assert obs.confidence == 0.55
+
+
+def test_few_commands_drops_confidence() -> None:
+    # 4 commands < SHELL_MASTERY_MIN_COMMANDS=5 → confidence floor 0.40.
+    out = list(extract_session(_session(["ls", "pwd", "id", "exit"]),
+                               sid="tab-low-n"))
+    obs = _of(out, PRIMITIVE)
+    assert obs.value == "none"
+    assert obs.confidence == 0.40
+
+
+def test_segmentation_populates_tab_count() -> None:
+    """End-to-end: tabs inside a command increment ``Command.tab_count``
+    once per byte and don't leak into the next command."""
+    events = _command(0.0, "l\ts\t") + _command(5.0, "pwd")
+    ctx = build_session_context(events, sid="seg-tab", source="t")
+    assert len(ctx.commands) == 2
+    assert ctx.commands[0].tab_count == 2
+    assert ctx.commands[1].tab_count == 0