feat(profiler/behave_shell): emit cognitive.tool_vocabulary
Absolute distinct first_token_hash count, bucketed against TOOL_VOCAB_NARROW_MAX / TOOL_VOCAB_BROAD_MIN. v0.1; D.8 re-tunes.
This commit is contained in:
@@ -17,6 +17,7 @@ from decnet.profiler.behave_shell._features.cognitive import (
|
||||
exploration_style,
|
||||
feedback_loop_engagement,
|
||||
planning_depth,
|
||||
tool_vocabulary,
|
||||
inter_command_consistency,
|
||||
inter_command_latency_class,
|
||||
)
|
||||
@@ -51,4 +52,5 @@ FEATURES: tuple[FeatureFn, ...] = (
|
||||
cognitive_load,
|
||||
exploration_style,
|
||||
planning_depth,
|
||||
tool_vocabulary,
|
||||
)
|
||||
|
||||
@@ -36,6 +36,8 @@ from decnet.profiler.behave_shell._thresholds import (
|
||||
PAUSE_CV_METRONOMIC_MAX,
|
||||
PLANNING_DEEP_MIN,
|
||||
PLANNING_REACTIVE_MIN,
|
||||
TOOL_VOCAB_BROAD_MIN,
|
||||
TOOL_VOCAB_NARROW_MAX,
|
||||
)
|
||||
|
||||
|
||||
@@ -184,6 +186,35 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
|
||||
)
|
||||
|
||||
|
||||
def tool_vocabulary(ctx: SessionContext) -> Iterator[Observation]:
|
||||
"""Emit ``cognitive.tool_vocabulary`` ∈ {narrow, moderate, broad}.
|
||||
|
||||
Absolute count of distinct first_token_hashes. Skip emission when
|
||||
no commands exist; below the sample-size floor we still emit, but
|
||||
at confidence 0.40 — a session with few commands but five distinct
|
||||
tools is genuinely a moderate-vocabulary signal.
|
||||
"""
|
||||
if not ctx.commands:
|
||||
return
|
||||
distinct = len({c.first_token_hash for c in ctx.commands})
|
||||
if distinct <= TOOL_VOCAB_NARROW_MAX:
|
||||
value = "narrow"
|
||||
elif distinct >= TOOL_VOCAB_BROAD_MIN:
|
||||
value = "broad"
|
||||
else:
|
||||
value = "moderate"
|
||||
if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||
confidence = 0.40
|
||||
else:
|
||||
confidence = 0.70
|
||||
yield make_observation(
|
||||
ctx,
|
||||
primitive="cognitive.tool_vocabulary",
|
||||
value=value,
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
|
||||
def planning_depth(ctx: SessionContext) -> Iterator[Observation]:
|
||||
"""Emit ``cognitive.planning_depth`` ∈ {deep, shallow, reactive}.
|
||||
|
||||
|
||||
@@ -141,6 +141,20 @@ EXPLORATION_CHAOTIC_BACKTRACK_MIN: float = 0.30
|
||||
PLANNING_DEEP_MIN: float = 0.40
|
||||
PLANNING_REACTIVE_MIN: float = 0.50
|
||||
|
||||
# ── cognitive.tool_vocabulary (Step D.4) ───────────────────────────────────
|
||||
# Absolute count of distinct first_token_hashes across the session.
|
||||
#
|
||||
# distinct <= TOOL_VOCAB_NARROW_MAX → narrow
|
||||
# distinct >= TOOL_VOCAB_BROAD_MIN → broad
|
||||
# otherwise → moderate
|
||||
#
|
||||
# Absolute, not normalised. A 3-command session with 3 unique tools is
|
||||
# ``narrow`` not ``broad`` — the operator simply hasn't shown range yet.
|
||||
# Sample-size honesty drops confidence below MIN_COMMANDS_FOR_FULL_CONFIDENCE.
|
||||
# v0.1; D.8 re-tunes.
|
||||
TOOL_VOCAB_NARROW_MAX: int = 3
|
||||
TOOL_VOCAB_BROAD_MIN: int = 10
|
||||
|
||||
# ── motor.keystroke_cadence (Step B.1) ──────────────────────────────────────
|
||||
# Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between
|
||||
# commands don't inflate the within-burst CV. Mirrors the prototype's
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
"""Step D.4: ``cognitive.tool_vocabulary``."""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.profiler.behave_shell import extract_session
|
||||
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||
|
||||
|
||||
def _of(observations: list, primitive: str):
|
||||
obs = [o for o in observations if o.primitive == primitive]
|
||||
assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
|
||||
return obs[0]
|
||||
|
||||
|
||||
def _cmds(tokens: list[str]) -> list[AsciinemaEvent]:
|
||||
events: list[AsciinemaEvent] = []
|
||||
for i, tok in enumerate(tokens):
|
||||
t0 = i * 1.0
|
||||
for j, c in enumerate(tok):
|
||||
events.append((t0 + j * 0.05, "i", c))
|
||||
events.append((t0 + len(tok) * 0.05, "i", "\r"))
|
||||
return events
|
||||
|
||||
|
||||
def test_no_commands_no_emission() -> None:
|
||||
out = list(extract_session([(0.0, "i", "x")], sid="tv-empty"))
|
||||
assert [o for o in out if o.primitive == "cognitive.tool_vocabulary"] == []
|
||||
|
||||
|
||||
def test_few_distinct_tools_emit_narrow() -> None:
|
||||
out = list(extract_session(
|
||||
_cmds(["ls", "ls", "ps", "ps", "ls", "ps", "ls", "ps"]),
|
||||
sid="tv-narrow",
|
||||
))
|
||||
obs = _of(out, "cognitive.tool_vocabulary")
|
||||
assert obs.value == "narrow"
|
||||
|
||||
|
||||
def test_mid_distinct_emit_moderate() -> None:
|
||||
out = list(extract_session(
|
||||
_cmds(["ls", "ps", "id", "uname", "whoami", "pwd"]),
|
||||
sid="tv-mod",
|
||||
))
|
||||
obs = _of(out, "cognitive.tool_vocabulary")
|
||||
assert obs.value == "moderate"
|
||||
|
||||
|
||||
def test_many_distinct_tools_emit_broad() -> None:
|
||||
out = list(extract_session(
|
||||
_cmds(["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"]),
|
||||
sid="tv-broad",
|
||||
))
|
||||
obs = _of(out, "cognitive.tool_vocabulary")
|
||||
assert obs.value == "broad"
|
||||
|
||||
|
||||
def test_low_sample_count_reduces_confidence() -> None:
|
||||
short = list(extract_session(_cmds(["a", "b"]), sid="tv-short"))
|
||||
full = list(extract_session(_cmds(["a", "b", "c", "d", "e", "f"]), sid="tv-full"))
|
||||
s = _of(short, "cognitive.tool_vocabulary")
|
||||
f = _of(full, "cognitive.tool_vocabulary")
|
||||
assert s.confidence < f.confidence
|
||||
Reference in New Issue
Block a user