From f286c84d95a6456a0032b2e4dd783c836fd48cae Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 3 May 2026 23:56:22 -0400 Subject: [PATCH] feat(profiler/behave_shell): emit cognitive.tool_vocabulary Absolute distinct first_token_hash count, bucketed against TOOL_VOCAB_NARROW_MAX / TOOL_VOCAB_BROAD_MIN. v0.1; D.8 re-tunes. --- .../behave_shell/_features/__init__.py | 2 + .../behave_shell/_features/cognitive.py | 31 ++++++++++ decnet/profiler/behave_shell/_thresholds.py | 14 +++++ .../test_cognitive_tool_vocabulary.py | 61 +++++++++++++++++++ 4 files changed, 108 insertions(+) create mode 100644 tests/profiler/behave_shell/test_cognitive_tool_vocabulary.py diff --git a/decnet/profiler/behave_shell/_features/__init__.py b/decnet/profiler/behave_shell/_features/__init__.py index 3e7fda4c..b3d357bb 100644 --- a/decnet/profiler/behave_shell/_features/__init__.py +++ b/decnet/profiler/behave_shell/_features/__init__.py @@ -17,6 +17,7 @@ from decnet.profiler.behave_shell._features.cognitive import ( exploration_style, feedback_loop_engagement, planning_depth, + tool_vocabulary, inter_command_consistency, inter_command_latency_class, ) @@ -51,4 +52,5 @@ FEATURES: tuple[FeatureFn, ...] = ( cognitive_load, exploration_style, planning_depth, + tool_vocabulary, ) diff --git a/decnet/profiler/behave_shell/_features/cognitive.py b/decnet/profiler/behave_shell/_features/cognitive.py index 61081887..cac925dc 100644 --- a/decnet/profiler/behave_shell/_features/cognitive.py +++ b/decnet/profiler/behave_shell/_features/cognitive.py @@ -36,6 +36,8 @@ from decnet.profiler.behave_shell._thresholds import ( PAUSE_CV_METRONOMIC_MAX, PLANNING_DEEP_MIN, PLANNING_REACTIVE_MIN, + TOOL_VOCAB_BROAD_MIN, + TOOL_VOCAB_NARROW_MAX, ) @@ -184,6 +186,35 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]: ) +def tool_vocabulary(ctx: SessionContext) -> Iterator[Observation]: + """Emit ``cognitive.tool_vocabulary`` ∈ {narrow, moderate, broad}. + + Absolute count of distinct first_token_hashes. Skip emission when + no commands exist; below the sample-size floor we still emit, but + at confidence 0.40 — a session with few commands but five distinct + tools is genuinely a moderate-vocabulary signal. + """ + if not ctx.commands: + return + distinct = len({c.first_token_hash for c in ctx.commands}) + if distinct <= TOOL_VOCAB_NARROW_MAX: + value = "narrow" + elif distinct >= TOOL_VOCAB_BROAD_MIN: + value = "broad" + else: + value = "moderate" + if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE: + confidence = 0.40 + else: + confidence = 0.70 + yield make_observation( + ctx, + primitive="cognitive.tool_vocabulary", + value=value, + confidence=confidence, + ) + + def planning_depth(ctx: SessionContext) -> Iterator[Observation]: """Emit ``cognitive.planning_depth`` ∈ {deep, shallow, reactive}. diff --git a/decnet/profiler/behave_shell/_thresholds.py b/decnet/profiler/behave_shell/_thresholds.py index 31c537c2..88a945e1 100644 --- a/decnet/profiler/behave_shell/_thresholds.py +++ b/decnet/profiler/behave_shell/_thresholds.py @@ -141,6 +141,20 @@ EXPLORATION_CHAOTIC_BACKTRACK_MIN: float = 0.30 PLANNING_DEEP_MIN: float = 0.40 PLANNING_REACTIVE_MIN: float = 0.50 +# ── cognitive.tool_vocabulary (Step D.4) ─────────────────────────────────── +# Absolute count of distinct first_token_hashes across the session. +# +# distinct <= TOOL_VOCAB_NARROW_MAX → narrow +# distinct >= TOOL_VOCAB_BROAD_MIN → broad +# otherwise → moderate +# +# Absolute, not normalised. A 3-command session with 3 unique tools is +# ``narrow`` not ``broad`` — the operator simply hasn't shown range yet. +# Sample-size honesty drops confidence below MIN_COMMANDS_FOR_FULL_CONFIDENCE. +# v0.1; D.8 re-tunes. +TOOL_VOCAB_NARROW_MAX: int = 3 +TOOL_VOCAB_BROAD_MIN: int = 10 + # ── motor.keystroke_cadence (Step B.1) ────────────────────────────────────── # Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between # commands don't inflate the within-burst CV. Mirrors the prototype's diff --git a/tests/profiler/behave_shell/test_cognitive_tool_vocabulary.py b/tests/profiler/behave_shell/test_cognitive_tool_vocabulary.py new file mode 100644 index 00000000..a029b6d4 --- /dev/null +++ b/tests/profiler/behave_shell/test_cognitive_tool_vocabulary.py @@ -0,0 +1,61 @@ +"""Step D.4: ``cognitive.tool_vocabulary``.""" +from __future__ import annotations + +from decnet.profiler.behave_shell import extract_session +from decnet.profiler.behave_shell._parse import AsciinemaEvent + + +def _of(observations: list, primitive: str): + obs = [o for o in observations if o.primitive == primitive] + assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}" + return obs[0] + + +def _cmds(tokens: list[str]) -> list[AsciinemaEvent]: + events: list[AsciinemaEvent] = [] + for i, tok in enumerate(tokens): + t0 = i * 1.0 + for j, c in enumerate(tok): + events.append((t0 + j * 0.05, "i", c)) + events.append((t0 + len(tok) * 0.05, "i", "\r")) + return events + + +def test_no_commands_no_emission() -> None: + out = list(extract_session([(0.0, "i", "x")], sid="tv-empty")) + assert [o for o in out if o.primitive == "cognitive.tool_vocabulary"] == [] + + +def test_few_distinct_tools_emit_narrow() -> None: + out = list(extract_session( + _cmds(["ls", "ls", "ps", "ps", "ls", "ps", "ls", "ps"]), + sid="tv-narrow", + )) + obs = _of(out, "cognitive.tool_vocabulary") + assert obs.value == "narrow" + + +def test_mid_distinct_emit_moderate() -> None: + out = list(extract_session( + _cmds(["ls", "ps", "id", "uname", "whoami", "pwd"]), + sid="tv-mod", + )) + obs = _of(out, "cognitive.tool_vocabulary") + assert obs.value == "moderate" + + +def test_many_distinct_tools_emit_broad() -> None: + out = list(extract_session( + _cmds(["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"]), + sid="tv-broad", + )) + obs = _of(out, "cognitive.tool_vocabulary") + assert obs.value == "broad" + + +def test_low_sample_count_reduces_confidence() -> None: + short = list(extract_session(_cmds(["a", "b"]), sid="tv-short")) + full = list(extract_session(_cmds(["a", "b", "c", "d", "e", "f"]), sid="tv-full")) + s = _of(short, "cognitive.tool_vocabulary") + f = _of(full, "cognitive.tool_vocabulary") + assert s.confidence < f.confidence