feat(profiler/behave_shell): emit cognitive.inter_command_latency_class
BEHAVE-EXTRACTOR.md Phase A Step 5. Classifies the operator's
thinking pace between commands. Splits LW-sim / CLAUDE-FF /
CLAUDE-CL.
* _features/cognitive.py:inter_command_latency_class(ctx) emits one
Observation in {instant, typing_speed, deliberate,
llm_lightweight, llm_heavyweight, long}, computed as the median
of ctx.inter_cmd_iats bucketed against the prototype thresholds
(v0.2 split: lightweight 2-8s, heavyweight 8-30s).
* Sample-size honesty: < 5 commands halves confidence (0.40 vs
0.80) per BEHAVE-EXTRACTOR.md.
* Threshold consts (INTER_CMD_*_MAX, MIN_COMMANDS_FOR_FULL_CONFIDENCE,
plus parked Step 6/7/8 thresholds for the next three commits)
added to _thresholds.py.
Tests cover all six buckets at empirically-anchored IATs (15s ≈
Claude Opus driving recon via tmux send-keys), plus the
single-command no-IAT and low-sample-count paths.
This commit is contained in:
@@ -11,6 +11,9 @@ from typing import Callable, Iterable
|
|||||||
from decnet_behave_core.spec.envelope import Observation
|
from decnet_behave_core.spec.envelope import Observation
|
||||||
|
|
||||||
from decnet.profiler.behave_shell._ctx import SessionContext
|
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||||
|
from decnet.profiler.behave_shell._features.cognitive import (
|
||||||
|
inter_command_latency_class,
|
||||||
|
)
|
||||||
from decnet.profiler.behave_shell._features.motor import (
|
from decnet.profiler.behave_shell._features.motor import (
|
||||||
input_modality,
|
input_modality,
|
||||||
paste_burst_rate,
|
paste_burst_rate,
|
||||||
@@ -21,4 +24,5 @@ FeatureFn = Callable[[SessionContext], Iterable[Observation]]
|
|||||||
FEATURES: tuple[FeatureFn, ...] = (
|
FEATURES: tuple[FeatureFn, ...] = (
|
||||||
input_modality,
|
input_modality,
|
||||||
paste_burst_rate,
|
paste_burst_rate,
|
||||||
|
inter_command_latency_class,
|
||||||
)
|
)
|
||||||
|
|||||||
61
decnet/profiler/behave_shell/_features/cognitive.py
Normal file
61
decnet/profiler/behave_shell/_features/cognitive.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
"""``cognitive.*`` feature functions.
|
||||||
|
|
||||||
|
Step 5: ``cognitive.inter_command_latency_class``.
|
||||||
|
Step 6: ``cognitive.command_branch_diversity``.
|
||||||
|
Step 7: ``cognitive.feedback_loop_engagement``.
|
||||||
|
Step 8: ``cognitive.inter_command_consistency``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import statistics
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from decnet_behave_core.spec.envelope import Observation
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||||
|
from decnet.profiler.behave_shell._features._emit import make_observation
|
||||||
|
from decnet.profiler.behave_shell._thresholds import (
|
||||||
|
INTER_CMD_DELIBERATE_MAX,
|
||||||
|
INTER_CMD_INSTANT_MAX,
|
||||||
|
INTER_CMD_LLM_HEAVYWEIGHT_MAX,
|
||||||
|
INTER_CMD_LLM_LIGHTWEIGHT_MAX,
|
||||||
|
INTER_CMD_TYPING_MAX,
|
||||||
|
MIN_COMMANDS_FOR_FULL_CONFIDENCE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _bucket_inter_cmd_latency(median_iat: float) -> str:
|
||||||
|
if median_iat <= INTER_CMD_INSTANT_MAX:
|
||||||
|
return "instant"
|
||||||
|
if median_iat <= INTER_CMD_TYPING_MAX:
|
||||||
|
return "typing_speed"
|
||||||
|
if median_iat <= INTER_CMD_DELIBERATE_MAX:
|
||||||
|
return "deliberate"
|
||||||
|
if median_iat <= INTER_CMD_LLM_LIGHTWEIGHT_MAX:
|
||||||
|
return "llm_lightweight"
|
||||||
|
if median_iat <= INTER_CMD_LLM_HEAVYWEIGHT_MAX:
|
||||||
|
return "llm_heavyweight"
|
||||||
|
return "long"
|
||||||
|
|
||||||
|
|
||||||
|
def inter_command_latency_class(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.inter_command_latency_class``.
|
||||||
|
|
||||||
|
Operator's *thinking pace* between commands, bucketed against
|
||||||
|
calibrated thresholds. Splits LW-sim / CLAUDE-FF / CLAUDE-CL.
|
||||||
|
"""
|
||||||
|
if not ctx.inter_cmd_iats:
|
||||||
|
return
|
||||||
|
median_iat = statistics.median(ctx.inter_cmd_iats)
|
||||||
|
bucket = _bucket_inter_cmd_latency(median_iat)
|
||||||
|
# Sample-size honesty: < 5 commands → halve confidence
|
||||||
|
if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
confidence = 0.80
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.inter_command_latency_class",
|
||||||
|
value=bucket,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
@@ -35,3 +35,46 @@ MODALITY_TYPED_MAX: float = 0.05
|
|||||||
# habit signal, input_modality is the dominant-channel signal.
|
# habit signal, input_modality is the dominant-channel signal.
|
||||||
PASTE_RATE_HABITUAL_MIN: float = 0.50
|
PASTE_RATE_HABITUAL_MIN: float = 0.50
|
||||||
PASTE_RATE_OCCASIONAL_MIN: float = 0.10
|
PASTE_RATE_OCCASIONAL_MIN: float = 0.10
|
||||||
|
|
||||||
|
# ── cognitive.inter_command_latency_class (Step 5) ──────────────────────────
|
||||||
|
# Bucket edges (seconds) for the median inter-command IAT. Prototype
|
||||||
|
# values; v0.2 splits the original llm_roundtrip 2-8s band into
|
||||||
|
# llm_lightweight (orchestrated agents w/ small models / terse prompts) and
|
||||||
|
# llm_heavyweight (reasoning-class agents in tool loops with text
|
||||||
|
# generation between calls). Empirical anchor: Claude Opus driving recon
|
||||||
|
# via tmux send-keys produced a median of 15.5s.
|
||||||
|
INTER_CMD_INSTANT_MAX: float = 0.30
|
||||||
|
INTER_CMD_TYPING_MAX: float = 1.50
|
||||||
|
INTER_CMD_DELIBERATE_MAX: float = 2.00
|
||||||
|
INTER_CMD_LLM_LIGHTWEIGHT_MAX: float = 8.00
|
||||||
|
INTER_CMD_LLM_HEAVYWEIGHT_MAX: float = 30.00
|
||||||
|
|
||||||
|
# Sample-size floor for inter-command IAT primitives. Below this we
|
||||||
|
# halve the confidence per BEHAVE-EXTRACTOR.md "sample-size honesty".
|
||||||
|
MIN_COMMANDS_FOR_FULL_CONFIDENCE: int = 5
|
||||||
|
|
||||||
|
# ── cognitive.command_branch_diversity (Step 6) ─────────────────────────────
|
||||||
|
# unique_first_tokens / total_commands ratio. Empirical (CLAUDE-FF vs
|
||||||
|
# CLAUDE-CL on 2026-05-02): fire-and-forget runs ~10 distinct tools (ratio
|
||||||
|
# near 1.0) → linear_playbook; closed-loop runs ~5-6 tools with the same
|
||||||
|
# tool re-invoked → adaptive_branching.
|
||||||
|
BRANCH_DIVERSITY_LINEAR_MIN: float = 0.80 # >= → linear_playbook
|
||||||
|
BRANCH_DIVERSITY_ADAPTIVE_MAX: float = 0.60 # <= → adaptive_branching
|
||||||
|
# Between is the ambiguous middle band — bias toward adaptive (the
|
||||||
|
# operator is reusing tools).
|
||||||
|
|
||||||
|
# ── cognitive.feedback_loop_engagement (Step 7) ─────────────────────────────
|
||||||
|
# Pearson r threshold for "the operator's pause grew with the volume of
|
||||||
|
# preceding output". |r| > this → significant; sign carries direction.
|
||||||
|
FEEDBACK_CORRELATION_MIN: float = 0.30
|
||||||
|
# Need at least this many (output_bytes, next_pause) pairs to even
|
||||||
|
# attempt a correlation. Below this the answer is "unknown".
|
||||||
|
FEEDBACK_MIN_PAIRS: int = 5
|
||||||
|
|
||||||
|
# ── cognitive.inter_command_consistency (Step 8) ────────────────────────────
|
||||||
|
# CV (stdev / mean) of inter-command IATs. Empirical (this corpus):
|
||||||
|
# human session CV=0.94 → variable; LLM-simulated CV=0.24 → metronomic;
|
||||||
|
# anything beyond 1.5 is heuristically "bimodal" (real bimodal detection
|
||||||
|
# via Hartigan dip is filed for v0.2).
|
||||||
|
PAUSE_CV_METRONOMIC_MAX: float = 0.40
|
||||||
|
PAUSE_CV_BIMODAL_MIN: float = 1.50
|
||||||
|
|||||||
@@ -0,0 +1,81 @@
|
|||||||
|
"""Step 5: ``cognitive.inter_command_latency_class``."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell import extract_session
|
||||||
|
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||||
|
|
||||||
|
|
||||||
|
def _of(observations: list, primitive: str):
|
||||||
|
obs = [o for o in observations if o.primitive == primitive]
|
||||||
|
assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
|
||||||
|
return obs[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _command_stream(starts: list[float]) -> list[AsciinemaEvent]:
|
||||||
|
"""Build an input stream that yields commands at the given start times."""
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
for s in starts:
|
||||||
|
events.append((s, "i", "x"))
|
||||||
|
events.append((s + 0.05, "i", "\r"))
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_commands_means_no_observation() -> None:
|
||||||
|
out = list(extract_session([], sid="lat-empty"))
|
||||||
|
assert [o for o in out if o.primitive == "cognitive.inter_command_latency_class"] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_command_no_iat_no_observation() -> None:
|
||||||
|
out = list(extract_session(_command_stream([0.0]), sid="lat-1"))
|
||||||
|
assert [o for o in out if o.primitive == "cognitive.inter_command_latency_class"] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_instant_bucket() -> None:
|
||||||
|
# IATs of 0.1s — well under 0.30 cap
|
||||||
|
starts = [i * 0.15 for i in range(6)]
|
||||||
|
out = list(extract_session(_command_stream(starts), sid="lat-instant"))
|
||||||
|
assert _of(out, "cognitive.inter_command_latency_class").value == "instant"
|
||||||
|
|
||||||
|
|
||||||
|
def test_typing_speed_bucket() -> None:
|
||||||
|
# IATs around 1.0s
|
||||||
|
starts = [i * 1.0 for i in range(6)]
|
||||||
|
out = list(extract_session(_command_stream(starts), sid="lat-typing"))
|
||||||
|
assert _of(out, "cognitive.inter_command_latency_class").value == "typing_speed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_deliberate_bucket() -> None:
|
||||||
|
# IATs around 1.85s — above typing (1.5), under deliberate cap (2.0)
|
||||||
|
starts = [i * 1.9 for i in range(6)]
|
||||||
|
out = list(extract_session(_command_stream(starts), sid="lat-deliberate"))
|
||||||
|
assert _of(out, "cognitive.inter_command_latency_class").value == "deliberate"
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_lightweight_bucket() -> None:
|
||||||
|
# IATs around 5s — within 2-8s band
|
||||||
|
starts = [i * 5.05 for i in range(6)]
|
||||||
|
out = list(extract_session(_command_stream(starts), sid="lat-lwt"))
|
||||||
|
assert _of(out, "cognitive.inter_command_latency_class").value == "llm_lightweight"
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_heavyweight_bucket() -> None:
|
||||||
|
# IATs around 15s — within 8-30s band; matches Claude Opus empirical
|
||||||
|
starts = [i * 15.05 for i in range(6)]
|
||||||
|
out = list(extract_session(_command_stream(starts), sid="lat-hvy"))
|
||||||
|
assert _of(out, "cognitive.inter_command_latency_class").value == "llm_heavyweight"
|
||||||
|
|
||||||
|
|
||||||
|
def test_long_bucket() -> None:
|
||||||
|
# IATs > 30s
|
||||||
|
starts = [i * 60.0 for i in range(6)]
|
||||||
|
out = list(extract_session(_command_stream(starts), sid="lat-long"))
|
||||||
|
assert _of(out, "cognitive.inter_command_latency_class").value == "long"
|
||||||
|
|
||||||
|
|
||||||
|
def test_low_sample_count_reduces_confidence() -> None:
|
||||||
|
# 2 commands → 1 IAT; below the floor
|
||||||
|
short = list(extract_session(_command_stream([0.0, 1.0]), sid="lat-low"))
|
||||||
|
full = list(extract_session(_command_stream([i * 1.0 for i in range(6)]), sid="lat-full"))
|
||||||
|
s = _of(short, "cognitive.inter_command_latency_class")
|
||||||
|
f = _of(full, "cognitive.inter_command_latency_class")
|
||||||
|
assert s.confidence < f.confidence
|
||||||
Reference in New Issue
Block a user