feat(profiler/behave_shell): emit cognitive.tool_vocabulary

Absolute distinct first_token_hash count, bucketed against
TOOL_VOCAB_NARROW_MAX / TOOL_VOCAB_BROAD_MIN. v0.1; D.8 re-tunes.
This commit is contained in:
2026-05-03 23:56:22 -04:00
parent 6c2e4ada83
commit f286c84d95
4 changed files with 108 additions and 0 deletions

View File

@@ -17,6 +17,7 @@ from decnet.profiler.behave_shell._features.cognitive import (
exploration_style,
feedback_loop_engagement,
planning_depth,
tool_vocabulary,
inter_command_consistency,
inter_command_latency_class,
)
@@ -51,4 +52,5 @@ FEATURES: tuple[FeatureFn, ...] = (
cognitive_load,
exploration_style,
planning_depth,
tool_vocabulary,
)

View File

@@ -36,6 +36,8 @@ from decnet.profiler.behave_shell._thresholds import (
PAUSE_CV_METRONOMIC_MAX,
PLANNING_DEEP_MIN,
PLANNING_REACTIVE_MIN,
TOOL_VOCAB_BROAD_MIN,
TOOL_VOCAB_NARROW_MAX,
)
@@ -184,6 +186,35 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
)
def tool_vocabulary(ctx: SessionContext) -> Iterator[Observation]:
"""Emit ``cognitive.tool_vocabulary`` ∈ {narrow, moderate, broad}.
Absolute count of distinct first_token_hashes. Skip emission when
no commands exist; below the sample-size floor we still emit, but
at confidence 0.40 — a session with few commands but five distinct
tools is genuinely a moderate-vocabulary signal.
"""
if not ctx.commands:
return
distinct = len({c.first_token_hash for c in ctx.commands})
if distinct <= TOOL_VOCAB_NARROW_MAX:
value = "narrow"
elif distinct >= TOOL_VOCAB_BROAD_MIN:
value = "broad"
else:
value = "moderate"
if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
confidence = 0.40
else:
confidence = 0.70
yield make_observation(
ctx,
primitive="cognitive.tool_vocabulary",
value=value,
confidence=confidence,
)
def planning_depth(ctx: SessionContext) -> Iterator[Observation]:
"""Emit ``cognitive.planning_depth`` ∈ {deep, shallow, reactive}.

View File

@@ -141,6 +141,20 @@ EXPLORATION_CHAOTIC_BACKTRACK_MIN: float = 0.30
PLANNING_DEEP_MIN: float = 0.40
PLANNING_REACTIVE_MIN: float = 0.50
# ── cognitive.tool_vocabulary (Step D.4) ───────────────────────────────────
# Absolute count of distinct first_token_hashes across the session.
#
# distinct <= TOOL_VOCAB_NARROW_MAX → narrow
# distinct >= TOOL_VOCAB_BROAD_MIN → broad
# otherwise → moderate
#
# Absolute, not normalised. A 3-command session with 3 unique tools is
# ``narrow`` not ``broad`` — the operator simply hasn't shown range yet.
# Sample-size honesty drops confidence below MIN_COMMANDS_FOR_FULL_CONFIDENCE.
# v0.1; D.8 re-tunes.
TOOL_VOCAB_NARROW_MAX: int = 3
TOOL_VOCAB_BROAD_MIN: int = 10
# ── motor.keystroke_cadence (Step B.1) ──────────────────────────────────────
# Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between
# commands don't inflate the within-burst CV. Mirrors the prototype's