diff --git a/decnet/profiler/behave_shell/_features/__init__.py b/decnet/profiler/behave_shell/_features/__init__.py index 906967d6..5ab5f170 100644 --- a/decnet/profiler/behave_shell/_features/__init__.py +++ b/decnet/profiler/behave_shell/_features/__init__.py @@ -14,6 +14,7 @@ from decnet.profiler.behave_shell._ctx import SessionContext from decnet.profiler.behave_shell._features.cognitive import ( cognitive_load, command_branch_diversity, + error_resilience_frustration_typing, error_resilience_retry_tactic, exploration_style, feedback_loop_engagement, @@ -55,4 +56,5 @@ FEATURES: tuple[FeatureFn, ...] = ( planning_depth, tool_vocabulary, error_resilience_retry_tactic, + error_resilience_frustration_typing, ) diff --git a/decnet/profiler/behave_shell/_features/cognitive.py b/decnet/profiler/behave_shell/_features/cognitive.py index 4264a183..8309ceef 100644 --- a/decnet/profiler/behave_shell/_features/cognitive.py +++ b/decnet/profiler/behave_shell/_features/cognitive.py @@ -25,6 +25,8 @@ from decnet.profiler.behave_shell._thresholds import ( EXPLORATION_TARGETED_REP_MIN, FEEDBACK_CORRELATION_MIN, FEEDBACK_MIN_PAIRS, + FRUSTRATION_LOW_MAX, + FRUSTRATION_MODERATE_MAX, IKI_THINK_MAX_S, INTER_CMD_DELIBERATE_MAX, INTER_CMD_INSTANT_MAX, @@ -186,6 +188,61 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]: ) +def error_resilience_frustration_typing(ctx: SessionContext) -> Iterator[Observation]: + """Emit ``cognitive.error_resilience.frustration_typing``. + + Compares median within-command IAT for commands *following* an + errored command against the same statistic for commands following + a successful command. A large relative delta indicates the operator + typed differently after a failure — speed-up (rage / fluency) or + slowdown (caution); both are signs of arousal. + + Skip emission when either group is empty (no errors, or every + command errored — no clean baseline). Sample-size honesty drops + confidence below the floor. + """ + post_err: list[float] = [] + post_ok: list[float] = [] + cmds = ctx.commands + intra = ctx.intra_command_iats + if len(cmds) < 2 or len(intra) != len(cmds): + return + for i in range(1, len(cmds)): + cmd_iats = intra[i] + if not cmd_iats: + continue + m = statistics.median(cmd_iats) + if cmds[i - 1].errored: + post_err.append(m) + else: + post_ok.append(m) + if not post_err or not post_ok: + return + median_err = statistics.median(post_err) + median_ok = statistics.median(post_ok) + if median_ok <= 0.0: + return + delta = abs(median_err - median_ok) / median_ok + + if delta < FRUSTRATION_LOW_MAX: + value = "low" + elif delta < FRUSTRATION_MODERATE_MAX: + value = "moderate" + else: + value = "high" + + if len(post_err) < MIN_COMMANDS_FOR_FULL_CONFIDENCE: + confidence = 0.40 + else: + confidence = 0.60 + yield make_observation( + ctx, + primitive="cognitive.error_resilience.frustration_typing", + value=value, + confidence=confidence, + ) + + def error_resilience_retry_tactic(ctx: SessionContext) -> Iterator[Observation]: """Emit ``cognitive.error_resilience.retry_tactic``. diff --git a/decnet/profiler/behave_shell/_thresholds.py b/decnet/profiler/behave_shell/_thresholds.py index 88a945e1..6249e296 100644 --- a/decnet/profiler/behave_shell/_thresholds.py +++ b/decnet/profiler/behave_shell/_thresholds.py @@ -155,6 +155,21 @@ PLANNING_REACTIVE_MIN: float = 0.50 TOOL_VOCAB_NARROW_MAX: int = 3 TOOL_VOCAB_BROAD_MIN: int = 10 +# ── cognitive.error_resilience.frustration_typing (Step D.6) ─────────────── +# Compare the median within-command IAT of commands *following* an +# errored command against the same statistic for commands following a +# successful command. The relative absolute delta: +# +# delta = |median_post_error - median_post_success| / median_post_success +# +# delta < FRUSTRATION_LOW_MAX → low +# delta < FRUSTRATION_MODERATE_MAX → moderate +# else → high +# +# v0.1; D.8 re-tunes. +FRUSTRATION_LOW_MAX: float = 0.10 +FRUSTRATION_MODERATE_MAX: float = 0.30 + # ── motor.keystroke_cadence (Step B.1) ────────────────────────────────────── # Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between # commands don't inflate the within-burst CV. Mirrors the prototype's diff --git a/tests/profiler/behave_shell/test_cognitive_error_resilience_frustration_typing.py b/tests/profiler/behave_shell/test_cognitive_error_resilience_frustration_typing.py new file mode 100644 index 00000000..5f98c487 --- /dev/null +++ b/tests/profiler/behave_shell/test_cognitive_error_resilience_frustration_typing.py @@ -0,0 +1,98 @@ +"""Step D.6: ``cognitive.error_resilience.frustration_typing``.""" +from __future__ import annotations + +from decnet.profiler.behave_shell import extract_session +from decnet.profiler.behave_shell._parse import AsciinemaEvent + + +PRIMITIVE = "cognitive.error_resilience.frustration_typing" + + +def _of(observations: list, primitive: str): + obs = [o for o in observations if o.primitive == primitive] + assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}" + return obs[0] + + +def _typed(text: str, t0: float, dt: float) -> list[AsciinemaEvent]: + return [(t0 + i * dt, "i", c) for i, c in enumerate(text)] + + +def _build(blocks: list[tuple[str, bool, float]]) -> list[AsciinemaEvent]: + """Synthesise a session. + + ``blocks`` is a list of (token, errored, dt) tuples. Each command + gets its own time slot 2s apart; ``dt`` is the within-command IAT. + """ + events: list[AsciinemaEvent] = [] + for i, (tok, errored, dt) in enumerate(blocks): + t0 = i * 2.0 + events.extend(_typed(f"{tok}\r", t0=t0, dt=dt)) + if errored: + cmd_end = t0 + len(tok) * dt + events.append((cmd_end + 0.10, "o", f"bash: {tok}: command not found\n")) + else: + cmd_end = t0 + len(tok) * dt + events.append((cmd_end + 0.10, "o", "ok\n")) + return events + + +def test_no_errors_no_emission() -> None: + out = list(extract_session(_build([("ls", False, 0.05)] * 5), sid="ft-clean")) + assert [o for o in out if o.primitive == PRIMITIVE] == [] + + +def test_no_baseline_no_emission() -> None: + """Every command errored — no clean baseline → skip emission.""" + out = list(extract_session(_build([("foo", True, 0.05)] * 5), sid="ft-allerr")) + assert [o for o in out if o.primitive == PRIMITIVE] == [] + + +def test_matching_speeds_emit_low() -> None: + """Same dt for post-error and post-success commands → delta ≈ 0 → low.""" + blocks = [ + ("ok", False, 0.05), + ("ok", False, 0.05), + ("foo", True, 0.05), + ("ok", False, 0.05), # post-err: dt=0.05 + ("ok", False, 0.05), # post-ok: dt=0.05 + ("foo", True, 0.05), + ("ok", False, 0.05), # post-err: dt=0.05 + ("ok", False, 0.05), + ] + out = list(extract_session(_build(blocks), sid="ft-low")) + obs = _of(out, PRIMITIVE) + assert obs.value == "low" + + +def test_huge_speed_change_emits_high() -> None: + """Post-error commands typed 4x slower than post-success → delta=3 → high.""" + blocks = [ + ("ok", False, 0.05), + ("ok", False, 0.05), # post-ok: dt=0.05 + ("foo", True, 0.05), + ("ok", False, 0.20), # post-err: dt=0.20 (4x slower) + ("ok", False, 0.05), # post-ok: dt=0.05 + ("foo", True, 0.05), + ("ok", False, 0.20), + ("ok", False, 0.05), + ] + out = list(extract_session(_build(blocks), sid="ft-high")) + obs = _of(out, PRIMITIVE) + assert obs.value == "high" + + +def test_low_post_error_count_reduces_confidence() -> None: + short = [ + ("ok", False, 0.05), + ("foo", True, 0.05), + ("ok", False, 0.05), + ("ok", False, 0.05), + ] + full_blocks = [("ok", False, 0.05)] + for _ in range(6): + full_blocks.append(("foo", True, 0.05)) + full_blocks.append(("ok", False, 0.05)) + s = _of(list(extract_session(_build(short), sid="ft-short")), PRIMITIVE) + f = _of(list(extract_session(_build(full_blocks), sid="ft-full")), PRIMITIVE) + assert s.confidence < f.confidence