feat(profiler/behave_shell): emit motor.shell_mastery.tab_completion
This commit is contained in:
@@ -23,6 +23,7 @@ from decnet.profiler.behave_shell._thresholds import (
|
||||
IKI_THINK_MAX_S,
|
||||
PASTE_BURST_MAX_IAT_S,
|
||||
PASTE_MIN_CHARS_PER_EVENT,
|
||||
SHORTCUT_CTRL_BYTES,
|
||||
)
|
||||
|
||||
|
||||
@@ -164,14 +165,26 @@ def _split_typing_bursts(iats: tuple[float, ...]) -> tuple[tuple[float, ...], ..
|
||||
def _segment_commands(inputs: list[AsciinemaEvent]) -> tuple[Command, ...]:
|
||||
"""Walk input events, splitting on ``\\r`` / ``\\n`` into commands.
|
||||
|
||||
PII discipline: only the first whitespace-delimited token is
|
||||
retained, and only as a sha256 hash. Buffer contents are dropped
|
||||
on every command boundary; an unterminated trailing buffer (no
|
||||
final newline) yields no command.
|
||||
Retains only the first whitespace-delimited token as a sha256 hash
|
||||
plus three integer counters needed for the Phase C
|
||||
``motor.shell_mastery.*`` primitives:
|
||||
|
||||
* ``tab_count`` — ``\\t`` (0x09) keystrokes in the command
|
||||
* ``shortcut_count`` — readline control bytes from
|
||||
:data:`SHORTCUT_CTRL_BYTES`
|
||||
* ``pipe_count`` — ``|`` characters in the command (counted on
|
||||
every byte; pasted pipelines still indicate pipeline fluency the
|
||||
operator chose to execute)
|
||||
|
||||
Buffer contents are dropped on every command boundary; an
|
||||
unterminated trailing buffer (no final newline) yields no command.
|
||||
"""
|
||||
cmds: list[Command] = []
|
||||
buf_chars: list[str] = []
|
||||
buf_start_ts: float | None = None
|
||||
tab_count = 0
|
||||
shortcut_count = 0
|
||||
pipe_count = 0
|
||||
|
||||
for t, _kind, data in inputs:
|
||||
for c in data:
|
||||
@@ -183,13 +196,25 @@ def _segment_commands(inputs: list[AsciinemaEvent]) -> tuple[Command, ...]:
|
||||
start_ts=buf_start_ts if buf_start_ts is not None else t,
|
||||
end_ts=t,
|
||||
first_token_hash=hash_token(first_token),
|
||||
tab_count=tab_count,
|
||||
shortcut_count=shortcut_count,
|
||||
pipe_count=pipe_count,
|
||||
))
|
||||
buf_chars = []
|
||||
buf_start_ts = None
|
||||
tab_count = 0
|
||||
shortcut_count = 0
|
||||
pipe_count = 0
|
||||
else:
|
||||
if not buf_chars:
|
||||
buf_start_ts = t
|
||||
buf_chars.append(c)
|
||||
if c == "\t":
|
||||
tab_count += 1
|
||||
elif c == "|":
|
||||
pipe_count += 1
|
||||
elif c in SHORTCUT_CTRL_BYTES:
|
||||
shortcut_count += 1
|
||||
|
||||
return tuple(cmds)
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ from decnet.profiler.behave_shell._features.motor import (
|
||||
keystroke_cadence,
|
||||
motor_stability,
|
||||
paste_burst_rate,
|
||||
tab_completion,
|
||||
)
|
||||
|
||||
FeatureFn = Callable[[SessionContext], Iterable[Observation]]
|
||||
@@ -35,6 +36,7 @@ FEATURES: tuple[FeatureFn, ...] = (
|
||||
motor_stability,
|
||||
error_correction,
|
||||
command_chunking,
|
||||
tab_completion,
|
||||
inter_command_latency_class,
|
||||
command_branch_diversity,
|
||||
feedback_loop_engagement,
|
||||
|
||||
@@ -26,11 +26,25 @@ from decnet.profiler.behave_shell._thresholds import (
|
||||
MODALITY_TYPED_MAX,
|
||||
PASTE_RATE_HABITUAL_MIN,
|
||||
PASTE_RATE_OCCASIONAL_MIN,
|
||||
SHELL_MASTERY_BOUNDARY_BAND,
|
||||
SHELL_MASTERY_MIN_COMMANDS,
|
||||
TAB_COMPLETION_HABITUAL_MIN,
|
||||
TAB_COMPLETION_OCCASIONAL_MAX,
|
||||
TREMOR_FAST_FLOOR_S,
|
||||
TREMOR_RATE_MIN,
|
||||
)
|
||||
|
||||
|
||||
def _near(value: float, boundary: float) -> bool:
|
||||
"""True iff ``value`` is within ``SHELL_MASTERY_BOUNDARY_BAND`` of
|
||||
``boundary`` (relative to the boundary). Phase C uses this to drop
|
||||
confidence when a measurement sits on a bucket fence.
|
||||
"""
|
||||
if boundary == 0:
|
||||
return abs(value) <= SHELL_MASTERY_BOUNDARY_BAND
|
||||
return abs(value - boundary) / boundary <= SHELL_MASTERY_BOUNDARY_BAND
|
||||
|
||||
|
||||
def input_modality(ctx: SessionContext) -> Iterator[Observation]:
|
||||
"""Emit ``motor.input_modality`` ∈ {typed, pasted, mixed}.
|
||||
|
||||
@@ -252,3 +266,53 @@ def command_chunking(ctx: SessionContext) -> Iterator[Observation]:
|
||||
value=value,
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
|
||||
def tab_completion(ctx: SessionContext) -> Iterator[Observation]:
|
||||
"""Emit ``motor.shell_mastery.tab_completion`` ∈ {none, occasional, habitual}.
|
||||
|
||||
Metric: fraction of commands containing at least one ``\\t`` keystroke.
|
||||
A pasted full command line that happens to embed a tab still counts —
|
||||
the operator chose to send the bytes — but in practice tab keystrokes
|
||||
only arrive interactively, so this is dominated by typed sessions.
|
||||
|
||||
Confidence:
|
||||
* < ``SHELL_MASTERY_MIN_COMMANDS`` → 0.40 (sample-size honesty).
|
||||
* Within ±10% of either bucket boundary → 0.55 (threshold proximity).
|
||||
* Otherwise → 0.75.
|
||||
|
||||
Skips emission when the session has no commands at all (no honest
|
||||
ratio to report; the registry doesn't admit ``unknown`` here).
|
||||
"""
|
||||
n = len(ctx.commands)
|
||||
if n == 0:
|
||||
return
|
||||
commands_with_tab = sum(1 for c in ctx.commands if c.tab_count > 0)
|
||||
ratio = commands_with_tab / n
|
||||
|
||||
if ratio == 0.0:
|
||||
value = "none"
|
||||
elif ratio < TAB_COMPLETION_OCCASIONAL_MAX:
|
||||
value = "occasional"
|
||||
elif ratio < TAB_COMPLETION_HABITUAL_MIN:
|
||||
# Registry's own gap (30%-<50%) — round down rather than up.
|
||||
value = "occasional"
|
||||
else:
|
||||
value = "habitual"
|
||||
|
||||
if n < SHELL_MASTERY_MIN_COMMANDS:
|
||||
confidence = 0.40
|
||||
elif (
|
||||
_near(ratio, TAB_COMPLETION_OCCASIONAL_MAX)
|
||||
or _near(ratio, TAB_COMPLETION_HABITUAL_MIN)
|
||||
):
|
||||
confidence = 0.55
|
||||
else:
|
||||
confidence = 0.75
|
||||
|
||||
yield make_observation(
|
||||
ctx,
|
||||
primitive="motor.shell_mastery.tab_completion",
|
||||
value=value,
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
@@ -47,11 +47,20 @@ class Command:
|
||||
``end_ts`` is the timestamp of the ``\\r`` / ``\\n`` that
|
||||
terminated the command; ``start_ts`` is the first character typed
|
||||
or pasted into it.
|
||||
|
||||
``tab_count`` / ``shortcut_count`` / ``pipe_count`` are integer
|
||||
counters populated by the context builder during the per-command
|
||||
byte sweep. They feed the ``motor.shell_mastery.*`` primitives
|
||||
(Phase C). The raw bytes themselves are read once during the
|
||||
sweep and discarded — only the counters are retained.
|
||||
"""
|
||||
|
||||
start_ts: float
|
||||
end_ts: float
|
||||
first_token_hash: str
|
||||
tab_count: int = 0
|
||||
shortcut_count: int = 0
|
||||
pipe_count: int = 0
|
||||
|
||||
|
||||
def hash_token(token: str) -> str:
|
||||
|
||||
@@ -109,3 +109,33 @@ BACKSPACE_IMMEDIATE_MAX_S: float = 0.50
|
||||
# Median CV of within-command IATs. Below this → fluent (steady within
|
||||
# each command); above → fragmented (operator pauses mid-command).
|
||||
CMD_CHUNKING_FLUENT_CV_MAX: float = 0.50
|
||||
|
||||
# ── motor.shell_mastery.* (Phase C) ─────────────────────────────────────────
|
||||
# Readline control bytes counted toward ``shortcut_usage``. The seven
|
||||
# pinned by BEHAVE-EXTRACTOR.md §Phase C (line 472):
|
||||
# ^A start-of-line ^E end-of-line ^W kill-prev-word
|
||||
# ^U kill-line ^R reverse-i-search ^B back-char ^F forward-char
|
||||
# v0.2 may extend to ^K/^Y/^L/^D/^P/^N once corpus calibration justifies it.
|
||||
# Note: ^U / ^W also feed ``motor.error_correction`` (Step B.3) via the
|
||||
# ``kill_line_count`` channel — these are independent measurements over
|
||||
# the same byte stream, not double-counting.
|
||||
SHORTCUT_CTRL_BYTES: frozenset[str] = frozenset({
|
||||
"\x01", "\x05", "\x17", "\x15", "\x12", "\x02", "\x06",
|
||||
})
|
||||
|
||||
# motor.shell_mastery.tab_completion — fraction of commands containing
|
||||
# at least one ``\t`` keystroke. Registry buckets per BEHAVE-EXTRACTOR.md
|
||||
# line 471: ``none`` (0%), ``occasional`` (<30%), ``habitual`` (≥50%).
|
||||
# The 30%-50% gap rounds down to ``occasional`` — the registry's own gap.
|
||||
TAB_COMPLETION_OCCASIONAL_MAX: float = 0.30
|
||||
TAB_COMPLETION_HABITUAL_MIN: float = 0.50
|
||||
|
||||
# Sample-size floor below which Phase C primitives drop confidence to
|
||||
# 0.40 (sample-size honesty). Mirrors MIN_COMMANDS_FOR_FULL_CONFIDENCE
|
||||
# but is named separately so a future tune can move them independently.
|
||||
SHELL_MASTERY_MIN_COMMANDS: int = 5
|
||||
|
||||
# Width of the "near a bucket boundary" band (relative to the boundary)
|
||||
# used by Phase C primitives. ±10% of the boundary value drops
|
||||
# confidence by 0.20 per BEHAVE-EXTRACTOR.md §"Threshold proximity".
|
||||
SHELL_MASTERY_BOUNDARY_BAND: float = 0.10
|
||||
|
||||
Reference in New Issue
Block a user