feat(profiler/behave_shell): emit cognitive.planning_depth

Distribution of inter-command IATs bucketed against IKI_THINK_MAX_S (deep) and INTER_CMD_INSTANT_MAX (reactive); fall-through is shallow. v0.1 thresholds; D.8 re-tunes.
2026-05-03 23:55:16 -04:00
parent 2254651270
commit 6c2e4ada83
4 changed files with 123 additions and 0 deletions
--- a/decnet/profiler/behave_shell/_features/init.py
+++ b/decnet/profiler/behave_shell/_features/init.py
@@ -16,6 +16,7 @@ from decnet.profiler.behave_shell._features.cognitive import (
    command_branch_diversity,
    exploration_style,
    feedback_loop_engagement,
    planning_depth,
    inter_command_consistency,
    inter_command_latency_class,
 )
@@ -49,4 +50,5 @@ FEATURES: tuple[FeatureFn, ...] = (
    inter_command_consistency,
    cognitive_load,
    exploration_style,
    planning_depth,
 )
--- a/decnet/profiler/behave_shell/_features/cognitive.py
+++ b/decnet/profiler/behave_shell/_features/cognitive.py
@@ -25,6 +25,7 @@ from decnet.profiler.behave_shell._thresholds import (
    EXPLORATION_TARGETED_REP_MIN,
    FEEDBACK_CORRELATION_MIN,
    FEEDBACK_MIN_PAIRS,
    IKI_THINK_MAX_S,
    INTER_CMD_DELIBERATE_MAX,
    INTER_CMD_INSTANT_MAX,
    INTER_CMD_LLM_HEAVYWEIGHT_MAX,
@@ -33,6 +34,8 @@ from decnet.profiler.behave_shell._thresholds import (
    MIN_COMMANDS_FOR_FULL_CONFIDENCE,
    PAUSE_CV_BIMODAL_MIN,
    PAUSE_CV_METRONOMIC_MAX,
    PLANNING_DEEP_MIN,
    PLANNING_REACTIVE_MIN,
 )
@@ -181,6 +184,50 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
    )
 def planning_depth(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.planning_depth`` ∈ {deep, shallow, reactive}.
    Read off the distribution of inter-command IATs:
    * **deep** — many think-pauses (> ``IKI_THINK_MAX_S``). The
      operator stops to think between commands.
    * **reactive** — most pauses are sub-instant
      (≤ ``INTER_CMD_INSTANT_MAX``). Knee-jerk pacing — automated
      runner, prepared playbook, or an LLM with no internal latency.
    * **shallow** — neither: mostly typing-speed pauses, no extended
      contemplation.
    Skip emission when no inter-command IATs exist (one or zero
    commands); the registry has no ``unknown`` for this primitive.
    """
    iats = ctx.inter_cmd_iats
    if not iats:
        return
    n = len(iats)
    deep_count = sum(1 for x in iats if x > IKI_THINK_MAX_S)
    reactive_count = sum(1 for x in iats if x <= INTER_CMD_INSTANT_MAX)
    deep_frac = deep_count / n
    reactive_frac = reactive_count / n
    if deep_frac >= PLANNING_DEEP_MIN:
        value = "deep"
    elif reactive_frac >= PLANNING_REACTIVE_MIN:
        value = "reactive"
    else:
        value = "shallow"
    if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
        confidence = 0.40
    else:
        confidence = 0.65
    yield make_observation(
        ctx,
        primitive="cognitive.planning_depth",
        value=value,
        confidence=confidence,
    )
 def exploration_style(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.exploration_style`` ∈ {methodical, chaotic, targeted}.
--- a/decnet/profiler/behave_shell/_thresholds.py
+++ b/decnet/profiler/behave_shell/_thresholds.py
@@ -128,6 +128,19 @@ COGNITIVE_LOAD_MEDIUM_MAX: float = 0.67
 EXPLORATION_TARGETED_REP_MIN: float = 0.50
 EXPLORATION_CHAOTIC_BACKTRACK_MIN: float = 0.30
 # ── cognitive.planning_depth (Step D.3) ────────────────────────────────────
 # Distribution of inter-command IATs.
 #   deep_pause_fraction      = (count of inter_cmd_iats > IKI_THINK_MAX_S) / N
 #   reactive_pause_fraction  = (count of inter_cmd_iats <= INTER_CMD_INSTANT_MAX) / N
 #
 #   deep_pause_fraction      >= PLANNING_DEEP_MIN     → deep
 #   reactive_pause_fraction  >= PLANNING_REACTIVE_MIN → reactive
 #   otherwise                                         → shallow
 #
 # v0.1; D.8 re-tunes once D.1-D.7 are stable.
 PLANNING_DEEP_MIN: float = 0.40
 PLANNING_REACTIVE_MIN: float = 0.50
 # ── motor.keystroke_cadence (Step B.1) ──────────────────────────────────────
 # Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between
 # commands don't inflate the within-burst CV. Mirrors the prototype's
--- a/tests/profiler/behave_shell/test_cognitive_planning_depth.py
+++ b/tests/profiler/behave_shell/test_cognitive_planning_depth.py
@@ -0,0 +1,61 @@
 """Step D.3: ``cognitive.planning_depth``."""
 from __future__ import annotations
 from decnet.profiler.behave_shell import extract_session
 from decnet.profiler.behave_shell._parse import AsciinemaEvent
 def _of(observations: list, primitive: str):
    obs = [o for o in observations if o.primitive == primitive]
    assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
    return obs[0]
 def _commands_at(starts: list[float]) -> list[AsciinemaEvent]:
    events: list[AsciinemaEvent] = []
    for s in starts:
        events.append((s, "i", "x\r"))
    return events
 def test_no_inter_cmd_iats_no_emission() -> None:
    out = list(extract_session(_commands_at([0.0]), sid="pd-empty"))
    assert [o for o in out if o.primitive == "cognitive.planning_depth"] == []
 def test_long_pauses_emit_deep() -> None:
    """Most pauses > 1.5s → deep."""
    out = list(extract_session(
        _commands_at([0.0, 3.0, 6.0, 9.0, 12.0, 15.0, 18.0, 21.0]),
        sid="pd-deep",
    ))
    obs = _of(out, "cognitive.planning_depth")
    assert obs.value == "deep"
 def test_sub_instant_pauses_emit_reactive() -> None:
    """Most pauses ≤ INTER_CMD_INSTANT_MAX (0.30s) → reactive."""
    out = list(extract_session(
        _commands_at([i * 0.10 for i in range(8)]),
        sid="pd-react",
    ))
    obs = _of(out, "cognitive.planning_depth")
    assert obs.value == "reactive"
 def test_typing_speed_pauses_emit_shallow() -> None:
    """Pauses around 1s — neither deep nor reactive → shallow."""
    out = list(extract_session(
        _commands_at([i * 1.0 for i in range(8)]),
        sid="pd-shallow",
    ))
    obs = _of(out, "cognitive.planning_depth")
    assert obs.value == "shallow"
 def test_low_sample_count_reduces_confidence() -> None:
    short = list(extract_session(_commands_at([0.0, 1.0, 2.0]), sid="pd-short"))
    full = list(extract_session(_commands_at([i * 1.0 for i in range(8)]), sid="pd-full"))
    s = _of(short, "cognitive.planning_depth")
    f = _of(full, "cognitive.planning_depth")
    assert s.confidence < f.confidence