diff --git a/decnet/profiler/behave_shell/_features/__init__.py b/decnet/profiler/behave_shell/_features/__init__.py index 5ab5f170..73e847b6 100644 --- a/decnet/profiler/behave_shell/_features/__init__.py +++ b/decnet/profiler/behave_shell/_features/__init__.py @@ -14,6 +14,7 @@ from decnet.profiler.behave_shell._ctx import SessionContext from decnet.profiler.behave_shell._features.cognitive import ( cognitive_load, command_branch_diversity, + error_resilience_fallback_to_man, error_resilience_frustration_typing, error_resilience_retry_tactic, exploration_style, @@ -57,4 +58,5 @@ FEATURES: tuple[FeatureFn, ...] = ( tool_vocabulary, error_resilience_retry_tactic, error_resilience_frustration_typing, + error_resilience_fallback_to_man, ) diff --git a/decnet/profiler/behave_shell/_features/cognitive.py b/decnet/profiler/behave_shell/_features/cognitive.py index 8309ceef..9d077eac 100644 --- a/decnet/profiler/behave_shell/_features/cognitive.py +++ b/decnet/profiler/behave_shell/_features/cognitive.py @@ -15,6 +15,7 @@ from decnet_behave_core.spec.envelope import Observation from decnet.profiler.behave_shell._ctx import SessionContext from decnet.profiler.behave_shell._features._emit import make_observation +from decnet.profiler.behave_shell._parse import hash_token from decnet.profiler.behave_shell._thresholds import ( BRANCH_DIVERSITY_LINEAR_MIN, COGNITIVE_LOAD_CHUNKING_REF_CV, @@ -43,6 +44,19 @@ from decnet.profiler.behave_shell._thresholds import ( ) +# Precomputed at import time so the per-session hot loop is a set +# membership check, not 3 sha256 ops per command. The ``--help`` / +# ``-h`` flag forms can't be detected here — they're not first tokens +# (PII discipline keeps only the *first* token's hash). v0.2 will +# reconsider once corpus calibration justifies storing arg-token +# hashes too. +_HELP_FAMILY_HASHES: frozenset[str] = frozenset({ + hash_token("man"), + hash_token("help"), + hash_token("info"), +}) + + def _clip01(x: float) -> float: if x < 0.0: return 0.0 @@ -188,6 +202,46 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]: ) +def error_resilience_fallback_to_man(ctx: SessionContext) -> Iterator[Observation]: + """Emit ``cognitive.error_resilience.fallback_to_man``. + + For each errored command, check whether the operator's next + command is ``man`` / ``help`` / ``info`` — i.e. they reached for + the manual rather than re-trying or pivoting. If at least one + errored command triggered this fallback → ``present``; otherwise + ``absent``. + + Skip emission when no commands errored — the registry's binary + has no ``unknown``, and emitting ``absent`` from no observation + at all would be dishonest. + + The ``--help`` / ``-h`` flag forms can't fire this primitive in + v0.1: they aren't first tokens, and the engine only retains + ``first_token_hash`` per command (PII discipline). Filed for v0.2. + """ + errored_indices = [i for i, c in enumerate(ctx.commands) if c.errored] + if not errored_indices: + return + fallback_count = 0 + for i in errored_indices: + if i + 1 >= len(ctx.commands): + continue + if ctx.commands[i + 1].first_token_hash in _HELP_FAMILY_HASHES: + fallback_count += 1 + value = "present" if fallback_count > 0 else "absent" + + if len(errored_indices) < MIN_COMMANDS_FOR_FULL_CONFIDENCE: + confidence = 0.40 + else: + confidence = 0.65 + yield make_observation( + ctx, + primitive="cognitive.error_resilience.fallback_to_man", + value=value, + confidence=confidence, + ) + + def error_resilience_frustration_typing(ctx: SessionContext) -> Iterator[Observation]: """Emit ``cognitive.error_resilience.frustration_typing``. diff --git a/tests/profiler/behave_shell/test_cognitive_error_resilience_fallback_to_man.py b/tests/profiler/behave_shell/test_cognitive_error_resilience_fallback_to_man.py new file mode 100644 index 00000000..2c0d8fac --- /dev/null +++ b/tests/profiler/behave_shell/test_cognitive_error_resilience_fallback_to_man.py @@ -0,0 +1,72 @@ +"""Step D.7: ``cognitive.error_resilience.fallback_to_man``.""" +from __future__ import annotations + +from decnet.profiler.behave_shell import extract_session +from decnet.profiler.behave_shell._parse import AsciinemaEvent + + +PRIMITIVE = "cognitive.error_resilience.fallback_to_man" + + +def _of(observations: list, primitive: str): + obs = [o for o in observations if o.primitive == primitive] + assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}" + return obs[0] + + +def _typed(text: str, t0: float, dt: float = 0.05) -> list[AsciinemaEvent]: + return [(t0 + i * dt, "i", c) for i, c in enumerate(text)] + + +def _err_then(token: str, response: str, t0: float) -> list[AsciinemaEvent]: + events = _typed(f"{token}\r", t0=t0) + cmd_end = t0 + len(token) * 0.05 + events.append((cmd_end + 0.10, "o", f"bash: {token}: command not found\n")) + events.extend(_typed(f"{response}\r", t0=t0 + 1.5)) + return events + + +def test_no_errors_no_emission() -> None: + events = _typed("ls\r", t0=0.0) + [(0.5, "o", "file1\n")] + out = list(extract_session(events, sid="ftm-clean")) + assert [o for o in out if o.primitive == PRIMITIVE] == [] + + +def test_help_after_error_emits_present() -> None: + """At least one fallback to a help-family token → present.""" + events: list[AsciinemaEvent] = [] + for i in range(5): + events.extend(_err_then("foo", "man", t0=i * 3.0)) + out = list(extract_session(events, sid="ftm-present")) + obs = _of(out, PRIMITIVE) + assert obs.value == "present" + + +def test_pivot_unrelated_emits_absent() -> None: + """Errors followed by non-help tools → absent.""" + events: list[AsciinemaEvent] = [] + for i in range(5): + events.extend(_err_then("foo", "ls", t0=i * 3.0)) + out = list(extract_session(events, sid="ftm-absent")) + obs = _of(out, PRIMITIVE) + assert obs.value == "absent" + + +def test_info_token_also_counts() -> None: + events: list[AsciinemaEvent] = [] + for i in range(5): + events.extend(_err_then("foo", "info", t0=i * 3.0)) + out = list(extract_session(events, sid="ftm-info")) + assert _of(out, PRIMITIVE).value == "present" + + +def test_low_error_count_reduces_confidence() -> None: + short_events: list[AsciinemaEvent] = [] + for i in range(2): + short_events.extend(_err_then("foo", "man", t0=i * 3.0)) + full_events: list[AsciinemaEvent] = [] + for i in range(6): + full_events.extend(_err_then("foo", "man", t0=i * 3.0)) + s = _of(list(extract_session(short_events, sid="ftm-short")), PRIMITIVE) + f = _of(list(extract_session(full_events, sid="ftm-full")), PRIMITIVE) + assert s.confidence < f.confidence