From 842b7de950952fe4005db6c3c49fd9ab47fd0f1f Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 3 May 2026 07:56:49 -0400 Subject: [PATCH] feat(profiler/behave_shell): emit cognitive.inter_command_consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BEHAVE-EXTRACTOR.md Phase A Step 8. Dispersion / bimodality of inter-command pauses. HUMAN-bimodal vs LLM-metronomic. * _features/cognitive.py:inter_command_consistency(ctx) emits one Observation in {metronomic, variable, bimodal}. * CV = stdev / mean of ctx.inter_cmd_iats. CV < 0.40 → metronomic (LLM-pure; corpus anchor 0.24); CV ≥ 1.50 → bimodal heuristic (LLM-assisted human; v0.1 placeholder, true bimodal via Hartigan dip is registry-flagged for v0.2); else → variable (human; corpus anchor 0.94). * < 2 IATs or zero mean → skip emission. < 5 commands halves confidence (0.40 vs 0.75) per sample-size honesty. Tests: too-few IATs → no emission, uniform → metronomic, human-like dispersion → variable, extreme bursts+gaps → bimodal, low-sample-count → reduced confidence. Step 8 closes the six-primitive calibration floor for Phase A. Step 9 (calibration grid lockdown) is the gate that pins it. --- .../behave_shell/_features/__init__.py | 2 + .../behave_shell/_features/cognitive.py | 39 +++++++++++++ ...est_cognitive_inter_command_consistency.py | 57 +++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 tests/profiler/behave_shell/test_cognitive_inter_command_consistency.py diff --git a/decnet/profiler/behave_shell/_features/__init__.py b/decnet/profiler/behave_shell/_features/__init__.py index 5a90323a..2c7d07f4 100644 --- a/decnet/profiler/behave_shell/_features/__init__.py +++ b/decnet/profiler/behave_shell/_features/__init__.py @@ -14,6 +14,7 @@ from decnet.profiler.behave_shell._ctx import SessionContext from decnet.profiler.behave_shell._features.cognitive import ( command_branch_diversity, feedback_loop_engagement, + inter_command_consistency, inter_command_latency_class, ) from decnet.profiler.behave_shell._features.motor import ( @@ -29,4 +30,5 @@ FEATURES: tuple[FeatureFn, ...] = ( inter_command_latency_class, command_branch_diversity, feedback_loop_engagement, + inter_command_consistency, ) diff --git a/decnet/profiler/behave_shell/_features/cognitive.py b/decnet/profiler/behave_shell/_features/cognitive.py index c5563489..058becd1 100644 --- a/decnet/profiler/behave_shell/_features/cognitive.py +++ b/decnet/profiler/behave_shell/_features/cognitive.py @@ -24,6 +24,8 @@ from decnet.profiler.behave_shell._thresholds import ( INTER_CMD_LLM_LIGHTWEIGHT_MAX, INTER_CMD_TYPING_MAX, MIN_COMMANDS_FOR_FULL_CONFIDENCE, + PAUSE_CV_BIMODAL_MIN, + PAUSE_CV_METRONOMIC_MAX, ) @@ -152,3 +154,40 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]: value=value, confidence=0.75, ) + + +def inter_command_consistency(ctx: SessionContext) -> Iterator[Observation]: + """Emit ``cognitive.inter_command_consistency``. + + CV (stdev / mean) of inter-command IATs. + + * ``metronomic`` (CV < 0.40) → LLM-pure. Empirical anchor: + LLM-simulated session CV ≈ 0.24 in this corpus. + * ``variable`` (0.40 ≤ CV < 1.50) → human. Empirical anchor: + human session CV ≈ 0.94. + * ``bimodal`` (CV ≥ 1.50) → LLM-assisted human, heuristic. v0.1 + uses CV-only; true bimodal detection (Hartigan dip / two-peak) + is filed for v0.2 per the registry's ``notes:`` field. + """ + iats = ctx.inter_cmd_iats + if len(iats) < 2: + return + mean = statistics.fmean(iats) + if mean <= 0.0: + return + cv = statistics.stdev(iats) / mean + if cv < PAUSE_CV_METRONOMIC_MAX: + value = "metronomic" + elif cv >= PAUSE_CV_BIMODAL_MIN: + value = "bimodal" + else: + value = "variable" + confidence = ( + 0.40 if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE else 0.75 + ) + yield make_observation( + ctx, + primitive="cognitive.inter_command_consistency", + value=value, + confidence=confidence, + ) diff --git a/tests/profiler/behave_shell/test_cognitive_inter_command_consistency.py b/tests/profiler/behave_shell/test_cognitive_inter_command_consistency.py new file mode 100644 index 00000000..9810736c --- /dev/null +++ b/tests/profiler/behave_shell/test_cognitive_inter_command_consistency.py @@ -0,0 +1,57 @@ +"""Step 8: ``cognitive.inter_command_consistency``.""" +from __future__ import annotations + +from decnet.profiler.behave_shell import extract_session +from decnet.profiler.behave_shell._parse import AsciinemaEvent + + +def _of(observations: list, primitive: str): + obs = [o for o in observations if o.primitive == primitive] + assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}" + return obs[0] + + +def _commands_at(starts: list[float]) -> list[AsciinemaEvent]: + events: list[AsciinemaEvent] = [] + for s in starts: + events.append((s, "i", "x\r")) + return events + + +def test_too_few_iats_no_emission() -> None: + out = list(extract_session(_commands_at([0.0, 1.0]), sid="cv-low")) + assert [o for o in out if o.primitive == "cognitive.inter_command_consistency"] == [] + + +def test_uniform_pace_emits_metronomic() -> None: + # Constant 1s gap → CV 0 + out = list(extract_session( + _commands_at([i * 1.0 for i in range(8)]), sid="cv-metro", + )) + obs = _of(out, "cognitive.inter_command_consistency") + assert obs.value == "metronomic" + + +def test_human_like_dispersion_emits_variable() -> None: + # Pauses around 1s mean with CV ≈ 0.9 (human empirical) + starts = [0.0, 0.4, 1.4, 1.6, 4.0, 4.4, 7.5] + out = list(extract_session(_commands_at(starts), sid="cv-var")) + obs = _of(out, "cognitive.inter_command_consistency") + assert obs.value == "variable" + + +def test_extreme_dispersion_emits_bimodal() -> None: + # Mix of very tight bursts and very long gaps → CV well above 1.5 + starts = [0.0, 0.1, 0.2, 30.0, 30.1, 30.2, 60.0] + out = list(extract_session(_commands_at(starts), sid="cv-bi")) + obs = _of(out, "cognitive.inter_command_consistency") + assert obs.value == "bimodal" + + +def test_low_sample_count_reduces_confidence() -> None: + # 3 commands → 2 IATs; below the floor of 5 + short = list(extract_session(_commands_at([0.0, 1.0, 2.0]), sid="cv-short")) + full = list(extract_session(_commands_at([i * 1.0 for i in range(8)]), sid="cv-full")) + s = _of(short, "cognitive.inter_command_consistency") + f = _of(full, "cognitive.inter_command_consistency") + assert s.confidence < f.confidence