feat(profiler/behave_shell): emit cognitive.error_resilience.frustration_typing
Compares median within-command IAT for commands following an errored command vs commands following a successful one. Relative absolute delta buckets to low / moderate / high. Skips when either group is empty (no errors, or no clean baseline). v0.1; D.8 re-tunes.
This commit is contained in:
@@ -14,6 +14,7 @@ from decnet.profiler.behave_shell._ctx import SessionContext
|
|||||||
from decnet.profiler.behave_shell._features.cognitive import (
|
from decnet.profiler.behave_shell._features.cognitive import (
|
||||||
cognitive_load,
|
cognitive_load,
|
||||||
command_branch_diversity,
|
command_branch_diversity,
|
||||||
|
error_resilience_frustration_typing,
|
||||||
error_resilience_retry_tactic,
|
error_resilience_retry_tactic,
|
||||||
exploration_style,
|
exploration_style,
|
||||||
feedback_loop_engagement,
|
feedback_loop_engagement,
|
||||||
@@ -55,4 +56,5 @@ FEATURES: tuple[FeatureFn, ...] = (
|
|||||||
planning_depth,
|
planning_depth,
|
||||||
tool_vocabulary,
|
tool_vocabulary,
|
||||||
error_resilience_retry_tactic,
|
error_resilience_retry_tactic,
|
||||||
|
error_resilience_frustration_typing,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ from decnet.profiler.behave_shell._thresholds import (
|
|||||||
EXPLORATION_TARGETED_REP_MIN,
|
EXPLORATION_TARGETED_REP_MIN,
|
||||||
FEEDBACK_CORRELATION_MIN,
|
FEEDBACK_CORRELATION_MIN,
|
||||||
FEEDBACK_MIN_PAIRS,
|
FEEDBACK_MIN_PAIRS,
|
||||||
|
FRUSTRATION_LOW_MAX,
|
||||||
|
FRUSTRATION_MODERATE_MAX,
|
||||||
IKI_THINK_MAX_S,
|
IKI_THINK_MAX_S,
|
||||||
INTER_CMD_DELIBERATE_MAX,
|
INTER_CMD_DELIBERATE_MAX,
|
||||||
INTER_CMD_INSTANT_MAX,
|
INTER_CMD_INSTANT_MAX,
|
||||||
@@ -186,6 +188,61 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def error_resilience_frustration_typing(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.error_resilience.frustration_typing``.
|
||||||
|
|
||||||
|
Compares median within-command IAT for commands *following* an
|
||||||
|
errored command against the same statistic for commands following
|
||||||
|
a successful command. A large relative delta indicates the operator
|
||||||
|
typed differently after a failure — speed-up (rage / fluency) or
|
||||||
|
slowdown (caution); both are signs of arousal.
|
||||||
|
|
||||||
|
Skip emission when either group is empty (no errors, or every
|
||||||
|
command errored — no clean baseline). Sample-size honesty drops
|
||||||
|
confidence below the floor.
|
||||||
|
"""
|
||||||
|
post_err: list[float] = []
|
||||||
|
post_ok: list[float] = []
|
||||||
|
cmds = ctx.commands
|
||||||
|
intra = ctx.intra_command_iats
|
||||||
|
if len(cmds) < 2 or len(intra) != len(cmds):
|
||||||
|
return
|
||||||
|
for i in range(1, len(cmds)):
|
||||||
|
cmd_iats = intra[i]
|
||||||
|
if not cmd_iats:
|
||||||
|
continue
|
||||||
|
m = statistics.median(cmd_iats)
|
||||||
|
if cmds[i - 1].errored:
|
||||||
|
post_err.append(m)
|
||||||
|
else:
|
||||||
|
post_ok.append(m)
|
||||||
|
if not post_err or not post_ok:
|
||||||
|
return
|
||||||
|
median_err = statistics.median(post_err)
|
||||||
|
median_ok = statistics.median(post_ok)
|
||||||
|
if median_ok <= 0.0:
|
||||||
|
return
|
||||||
|
delta = abs(median_err - median_ok) / median_ok
|
||||||
|
|
||||||
|
if delta < FRUSTRATION_LOW_MAX:
|
||||||
|
value = "low"
|
||||||
|
elif delta < FRUSTRATION_MODERATE_MAX:
|
||||||
|
value = "moderate"
|
||||||
|
else:
|
||||||
|
value = "high"
|
||||||
|
|
||||||
|
if len(post_err) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
confidence = 0.60
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.error_resilience.frustration_typing",
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def error_resilience_retry_tactic(ctx: SessionContext) -> Iterator[Observation]:
|
def error_resilience_retry_tactic(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
"""Emit ``cognitive.error_resilience.retry_tactic``.
|
"""Emit ``cognitive.error_resilience.retry_tactic``.
|
||||||
|
|
||||||
|
|||||||
@@ -155,6 +155,21 @@ PLANNING_REACTIVE_MIN: float = 0.50
|
|||||||
TOOL_VOCAB_NARROW_MAX: int = 3
|
TOOL_VOCAB_NARROW_MAX: int = 3
|
||||||
TOOL_VOCAB_BROAD_MIN: int = 10
|
TOOL_VOCAB_BROAD_MIN: int = 10
|
||||||
|
|
||||||
|
# ── cognitive.error_resilience.frustration_typing (Step D.6) ───────────────
|
||||||
|
# Compare the median within-command IAT of commands *following* an
|
||||||
|
# errored command against the same statistic for commands following a
|
||||||
|
# successful command. The relative absolute delta:
|
||||||
|
#
|
||||||
|
# delta = |median_post_error - median_post_success| / median_post_success
|
||||||
|
#
|
||||||
|
# delta < FRUSTRATION_LOW_MAX → low
|
||||||
|
# delta < FRUSTRATION_MODERATE_MAX → moderate
|
||||||
|
# else → high
|
||||||
|
#
|
||||||
|
# v0.1; D.8 re-tunes.
|
||||||
|
FRUSTRATION_LOW_MAX: float = 0.10
|
||||||
|
FRUSTRATION_MODERATE_MAX: float = 0.30
|
||||||
|
|
||||||
# ── motor.keystroke_cadence (Step B.1) ──────────────────────────────────────
|
# ── motor.keystroke_cadence (Step B.1) ──────────────────────────────────────
|
||||||
# Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between
|
# Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between
|
||||||
# commands don't inflate the within-burst CV. Mirrors the prototype's
|
# commands don't inflate the within-burst CV. Mirrors the prototype's
|
||||||
|
|||||||
@@ -0,0 +1,98 @@
|
|||||||
|
"""Step D.6: ``cognitive.error_resilience.frustration_typing``."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell import extract_session
|
||||||
|
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||||
|
|
||||||
|
|
||||||
|
PRIMITIVE = "cognitive.error_resilience.frustration_typing"
|
||||||
|
|
||||||
|
|
||||||
|
def _of(observations: list, primitive: str):
|
||||||
|
obs = [o for o in observations if o.primitive == primitive]
|
||||||
|
assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
|
||||||
|
return obs[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _typed(text: str, t0: float, dt: float) -> list[AsciinemaEvent]:
|
||||||
|
return [(t0 + i * dt, "i", c) for i, c in enumerate(text)]
|
||||||
|
|
||||||
|
|
||||||
|
def _build(blocks: list[tuple[str, bool, float]]) -> list[AsciinemaEvent]:
|
||||||
|
"""Synthesise a session.
|
||||||
|
|
||||||
|
``blocks`` is a list of (token, errored, dt) tuples. Each command
|
||||||
|
gets its own time slot 2s apart; ``dt`` is the within-command IAT.
|
||||||
|
"""
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
for i, (tok, errored, dt) in enumerate(blocks):
|
||||||
|
t0 = i * 2.0
|
||||||
|
events.extend(_typed(f"{tok}\r", t0=t0, dt=dt))
|
||||||
|
if errored:
|
||||||
|
cmd_end = t0 + len(tok) * dt
|
||||||
|
events.append((cmd_end + 0.10, "o", f"bash: {tok}: command not found\n"))
|
||||||
|
else:
|
||||||
|
cmd_end = t0 + len(tok) * dt
|
||||||
|
events.append((cmd_end + 0.10, "o", "ok\n"))
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_errors_no_emission() -> None:
|
||||||
|
out = list(extract_session(_build([("ls", False, 0.05)] * 5), sid="ft-clean"))
|
||||||
|
assert [o for o in out if o.primitive == PRIMITIVE] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_baseline_no_emission() -> None:
|
||||||
|
"""Every command errored — no clean baseline → skip emission."""
|
||||||
|
out = list(extract_session(_build([("foo", True, 0.05)] * 5), sid="ft-allerr"))
|
||||||
|
assert [o for o in out if o.primitive == PRIMITIVE] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_matching_speeds_emit_low() -> None:
|
||||||
|
"""Same dt for post-error and post-success commands → delta ≈ 0 → low."""
|
||||||
|
blocks = [
|
||||||
|
("ok", False, 0.05),
|
||||||
|
("ok", False, 0.05),
|
||||||
|
("foo", True, 0.05),
|
||||||
|
("ok", False, 0.05), # post-err: dt=0.05
|
||||||
|
("ok", False, 0.05), # post-ok: dt=0.05
|
||||||
|
("foo", True, 0.05),
|
||||||
|
("ok", False, 0.05), # post-err: dt=0.05
|
||||||
|
("ok", False, 0.05),
|
||||||
|
]
|
||||||
|
out = list(extract_session(_build(blocks), sid="ft-low"))
|
||||||
|
obs = _of(out, PRIMITIVE)
|
||||||
|
assert obs.value == "low"
|
||||||
|
|
||||||
|
|
||||||
|
def test_huge_speed_change_emits_high() -> None:
|
||||||
|
"""Post-error commands typed 4x slower than post-success → delta=3 → high."""
|
||||||
|
blocks = [
|
||||||
|
("ok", False, 0.05),
|
||||||
|
("ok", False, 0.05), # post-ok: dt=0.05
|
||||||
|
("foo", True, 0.05),
|
||||||
|
("ok", False, 0.20), # post-err: dt=0.20 (4x slower)
|
||||||
|
("ok", False, 0.05), # post-ok: dt=0.05
|
||||||
|
("foo", True, 0.05),
|
||||||
|
("ok", False, 0.20),
|
||||||
|
("ok", False, 0.05),
|
||||||
|
]
|
||||||
|
out = list(extract_session(_build(blocks), sid="ft-high"))
|
||||||
|
obs = _of(out, PRIMITIVE)
|
||||||
|
assert obs.value == "high"
|
||||||
|
|
||||||
|
|
||||||
|
def test_low_post_error_count_reduces_confidence() -> None:
|
||||||
|
short = [
|
||||||
|
("ok", False, 0.05),
|
||||||
|
("foo", True, 0.05),
|
||||||
|
("ok", False, 0.05),
|
||||||
|
("ok", False, 0.05),
|
||||||
|
]
|
||||||
|
full_blocks = [("ok", False, 0.05)]
|
||||||
|
for _ in range(6):
|
||||||
|
full_blocks.append(("foo", True, 0.05))
|
||||||
|
full_blocks.append(("ok", False, 0.05))
|
||||||
|
s = _of(list(extract_session(_build(short), sid="ft-short")), PRIMITIVE)
|
||||||
|
f = _of(list(extract_session(_build(full_blocks), sid="ft-full")), PRIMITIVE)
|
||||||
|
assert s.confidence < f.confidence
|
||||||
Reference in New Issue
Block a user