From f286c84d95a6456a0032b2e4dd783c836fd48cae Mon Sep 17 00:00:00 2001
From: anti <samuel@securejump.cl>
Date: Sun, 3 May 2026 23:56:22 -0400
Subject: [PATCH] feat(profiler/behave_shell): emit cognitive.tool_vocabulary

Absolute distinct first_token_hash count, bucketed against
TOOL_VOCAB_NARROW_MAX / TOOL_VOCAB_BROAD_MIN. v0.1; D.8 re-tunes.
---
 .../behave_shell/_features/__init__.py        |  2 +
 .../behave_shell/_features/cognitive.py       | 31 ++++++++++
 decnet/profiler/behave_shell/_thresholds.py   | 14 +++++
 .../test_cognitive_tool_vocabulary.py         | 61 +++++++++++++++++++
 4 files changed, 108 insertions(+)
 create mode 100644 tests/profiler/behave_shell/test_cognitive_tool_vocabulary.py

diff --git a/decnet/profiler/behave_shell/_features/__init__.py b/decnet/profiler/behave_shell/_features/__init__.py
index 3e7fda4c..b3d357bb 100644
--- a/decnet/profiler/behave_shell/_features/__init__.py
+++ b/decnet/profiler/behave_shell/_features/__init__.py
@@ -17,6 +17,7 @@ from decnet.profiler.behave_shell._features.cognitive import (
     exploration_style,
     feedback_loop_engagement,
     planning_depth,
+    tool_vocabulary,
     inter_command_consistency,
     inter_command_latency_class,
 )
@@ -51,4 +52,5 @@ FEATURES: tuple[FeatureFn, ...] = (
     cognitive_load,
     exploration_style,
     planning_depth,
+    tool_vocabulary,
 )
diff --git a/decnet/profiler/behave_shell/_features/cognitive.py b/decnet/profiler/behave_shell/_features/cognitive.py
index 61081887..cac925dc 100644
--- a/decnet/profiler/behave_shell/_features/cognitive.py
+++ b/decnet/profiler/behave_shell/_features/cognitive.py
@@ -36,6 +36,8 @@ from decnet.profiler.behave_shell._thresholds import (
     PAUSE_CV_METRONOMIC_MAX,
     PLANNING_DEEP_MIN,
     PLANNING_REACTIVE_MIN,
+    TOOL_VOCAB_BROAD_MIN,
+    TOOL_VOCAB_NARROW_MAX,
 )
 
 
@@ -184,6 +186,35 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
     )
 
 
+def tool_vocabulary(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.tool_vocabulary`` ∈ {narrow, moderate, broad}.
+
+    Absolute count of distinct first_token_hashes. Skip emission when
+    no commands exist; below the sample-size floor we still emit, but
+    at confidence 0.40 — a session with few commands but five distinct
+    tools is genuinely a moderate-vocabulary signal.
+    """
+    if not ctx.commands:
+        return
+    distinct = len({c.first_token_hash for c in ctx.commands})
+    if distinct <= TOOL_VOCAB_NARROW_MAX:
+        value = "narrow"
+    elif distinct >= TOOL_VOCAB_BROAD_MIN:
+        value = "broad"
+    else:
+        value = "moderate"
+    if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
+        confidence = 0.40
+    else:
+        confidence = 0.70
+    yield make_observation(
+        ctx,
+        primitive="cognitive.tool_vocabulary",
+        value=value,
+        confidence=confidence,
+    )
+
+
 def planning_depth(ctx: SessionContext) -> Iterator[Observation]:
     """Emit ``cognitive.planning_depth`` ∈ {deep, shallow, reactive}.
 
diff --git a/decnet/profiler/behave_shell/_thresholds.py b/decnet/profiler/behave_shell/_thresholds.py
index 31c537c2..88a945e1 100644
--- a/decnet/profiler/behave_shell/_thresholds.py
+++ b/decnet/profiler/behave_shell/_thresholds.py
@@ -141,6 +141,20 @@ EXPLORATION_CHAOTIC_BACKTRACK_MIN: float = 0.30
 PLANNING_DEEP_MIN: float = 0.40
 PLANNING_REACTIVE_MIN: float = 0.50
 
+# ── cognitive.tool_vocabulary (Step D.4) ───────────────────────────────────
+# Absolute count of distinct first_token_hashes across the session.
+#
+#   distinct <= TOOL_VOCAB_NARROW_MAX  → narrow
+#   distinct >= TOOL_VOCAB_BROAD_MIN   → broad
+#   otherwise                          → moderate
+#
+# Absolute, not normalised. A 3-command session with 3 unique tools is
+# ``narrow`` not ``broad`` — the operator simply hasn't shown range yet.
+# Sample-size honesty drops confidence below MIN_COMMANDS_FOR_FULL_CONFIDENCE.
+# v0.1; D.8 re-tunes.
+TOOL_VOCAB_NARROW_MAX: int = 3
+TOOL_VOCAB_BROAD_MIN: int = 10
+
 # ── motor.keystroke_cadence (Step B.1) ──────────────────────────────────────
 # Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between
 # commands don't inflate the within-burst CV. Mirrors the prototype's
diff --git a/tests/profiler/behave_shell/test_cognitive_tool_vocabulary.py b/tests/profiler/behave_shell/test_cognitive_tool_vocabulary.py
new file mode 100644
index 00000000..a029b6d4
--- /dev/null
+++ b/tests/profiler/behave_shell/test_cognitive_tool_vocabulary.py
@@ -0,0 +1,61 @@
+"""Step D.4: ``cognitive.tool_vocabulary``."""
+from __future__ import annotations
+
+from decnet.profiler.behave_shell import extract_session
+from decnet.profiler.behave_shell._parse import AsciinemaEvent
+
+
+def _of(observations: list, primitive: str):
+    obs = [o for o in observations if o.primitive == primitive]
+    assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
+    return obs[0]
+
+
+def _cmds(tokens: list[str]) -> list[AsciinemaEvent]:
+    events: list[AsciinemaEvent] = []
+    for i, tok in enumerate(tokens):
+        t0 = i * 1.0
+        for j, c in enumerate(tok):
+            events.append((t0 + j * 0.05, "i", c))
+        events.append((t0 + len(tok) * 0.05, "i", "\r"))
+    return events
+
+
+def test_no_commands_no_emission() -> None:
+    out = list(extract_session([(0.0, "i", "x")], sid="tv-empty"))
+    assert [o for o in out if o.primitive == "cognitive.tool_vocabulary"] == []
+
+
+def test_few_distinct_tools_emit_narrow() -> None:
+    out = list(extract_session(
+        _cmds(["ls", "ls", "ps", "ps", "ls", "ps", "ls", "ps"]),
+        sid="tv-narrow",
+    ))
+    obs = _of(out, "cognitive.tool_vocabulary")
+    assert obs.value == "narrow"
+
+
+def test_mid_distinct_emit_moderate() -> None:
+    out = list(extract_session(
+        _cmds(["ls", "ps", "id", "uname", "whoami", "pwd"]),
+        sid="tv-mod",
+    ))
+    obs = _of(out, "cognitive.tool_vocabulary")
+    assert obs.value == "moderate"
+
+
+def test_many_distinct_tools_emit_broad() -> None:
+    out = list(extract_session(
+        _cmds(["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"]),
+        sid="tv-broad",
+    ))
+    obs = _of(out, "cognitive.tool_vocabulary")
+    assert obs.value == "broad"
+
+
+def test_low_sample_count_reduces_confidence() -> None:
+    short = list(extract_session(_cmds(["a", "b"]), sid="tv-short"))
+    full = list(extract_session(_cmds(["a", "b", "c", "d", "e", "f"]), sid="tv-full"))
+    s = _of(short, "cognitive.tool_vocabulary")
+    f = _of(full, "cognitive.tool_vocabulary")
+    assert s.confidence < f.confidence