From b704352783fa83f2e585a7f503da2083cf5831ae Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 3 May 2026 23:58:00 -0400 Subject: [PATCH] feat(profiler/behave_shell): emit cognitive.error_resilience.retry_tactic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modal response across Command.errored=True commands: * same first_token_hash on next command → rerun * different first_token_hash → switch * no next command → abort Tiebreak in registry order. The fourth registry value 'modify' requires within-command arg diffing (PII boundary); deferred to v0.2. --- .../behave_shell/_features/__init__.py | 2 + .../behave_shell/_features/cognitive.py | 53 ++++++++++++ ...cognitive_error_resilience_retry_tactic.py | 85 +++++++++++++++++++ 3 files changed, 140 insertions(+) create mode 100644 tests/profiler/behave_shell/test_cognitive_error_resilience_retry_tactic.py diff --git a/decnet/profiler/behave_shell/_features/__init__.py b/decnet/profiler/behave_shell/_features/__init__.py index b3d357bb..906967d6 100644 --- a/decnet/profiler/behave_shell/_features/__init__.py +++ b/decnet/profiler/behave_shell/_features/__init__.py @@ -14,6 +14,7 @@ from decnet.profiler.behave_shell._ctx import SessionContext from decnet.profiler.behave_shell._features.cognitive import ( cognitive_load, command_branch_diversity, + error_resilience_retry_tactic, exploration_style, feedback_loop_engagement, planning_depth, @@ -53,4 +54,5 @@ FEATURES: tuple[FeatureFn, ...] = ( exploration_style, planning_depth, tool_vocabulary, + error_resilience_retry_tactic, ) diff --git a/decnet/profiler/behave_shell/_features/cognitive.py b/decnet/profiler/behave_shell/_features/cognitive.py index cac925dc..4264a183 100644 --- a/decnet/profiler/behave_shell/_features/cognitive.py +++ b/decnet/profiler/behave_shell/_features/cognitive.py @@ -186,6 +186,59 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]: ) +def error_resilience_retry_tactic(ctx: SessionContext) -> Iterator[Observation]: + """Emit ``cognitive.error_resilience.retry_tactic``. + + For each command with ``Command.errored=True``, classify the + operator's response by the *next* command: + + * **rerun** — same first_token_hash as the errored command. The + operator re-invoked the same tool (often after fixing args + mid-edit, but we can't see args). + * **switch** — different first_token_hash. Pivoted to a different + tool. + * **abort** — no next command. Session ended after the error. + + The session's reported tactic is the **modal** response across all + errored commands (with ties broken in registry order: rerun > + modify > switch > abort). Skip emission entirely when no commands + errored — the registry has no ``unknown`` here, and silence is the + most honest answer. + + The ``modify`` value (edit-and-retry) requires within-command + diffing of arg tokens, which crosses the PII boundary the engine + holds (only ``first_token_hash`` is retained per command). v0.1 + therefore never emits ``modify``; v0.2 will once the PII trade-off + is revisited against a real attacker corpus. + """ + errored = [(i, c) for i, c in enumerate(ctx.commands) if c.errored] + if not errored: + return + counts = {"rerun": 0, "switch": 0, "abort": 0} + for i, cmd in errored: + if i + 1 >= len(ctx.commands): + counts["abort"] += 1 + elif ctx.commands[i + 1].first_token_hash == cmd.first_token_hash: + counts["rerun"] += 1 + else: + counts["switch"] += 1 + # Registry-order tiebreak (rerun > modify > switch > abort). + # `modify` deferred — never increments here. + order = ("rerun", "switch", "abort") + value = max(order, key=lambda k: counts[k]) + + if len(errored) < MIN_COMMANDS_FOR_FULL_CONFIDENCE: + confidence = 0.40 + else: + confidence = 0.65 + yield make_observation( + ctx, + primitive="cognitive.error_resilience.retry_tactic", + value=value, + confidence=confidence, + ) + + def tool_vocabulary(ctx: SessionContext) -> Iterator[Observation]: """Emit ``cognitive.tool_vocabulary`` ∈ {narrow, moderate, broad}. diff --git a/tests/profiler/behave_shell/test_cognitive_error_resilience_retry_tactic.py b/tests/profiler/behave_shell/test_cognitive_error_resilience_retry_tactic.py new file mode 100644 index 00000000..d2909938 --- /dev/null +++ b/tests/profiler/behave_shell/test_cognitive_error_resilience_retry_tactic.py @@ -0,0 +1,85 @@ +"""Step D.5: ``cognitive.error_resilience.retry_tactic``.""" +from __future__ import annotations + +from decnet.profiler.behave_shell import extract_session +from decnet.profiler.behave_shell._parse import AsciinemaEvent + + +PRIMITIVE = "cognitive.error_resilience.retry_tactic" + + +def _of(observations: list, primitive: str): + obs = [o for o in observations if o.primitive == primitive] + assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}" + return obs[0] + + +def _typed(text: str, t0: float = 0.0, dt: float = 0.05) -> list[AsciinemaEvent]: + return [(t0 + i * dt, "i", c) for i, c in enumerate(text)] + + +def _err_then(token: str, next_token: str | None, t0: float = 0.0) -> list[AsciinemaEvent]: + """``token`` errors; ``next_token`` is the operator's response (or None). + + Output event lands after the ``\\r`` so it falls inside the + command's post-execution window. + """ + events: list[AsciinemaEvent] = [] + events.extend(_typed(f"{token}\r", t0=t0)) + cmd_end = t0 + len(token) * 0.05 # \r is the last char + events.append((cmd_end + 0.10, "o", f"bash: {token}: command not found\n")) + if next_token is not None: + events.extend(_typed(f"{next_token}\r", t0=t0 + 1.5)) + return events + + +def test_no_errors_no_emission() -> None: + events: list[AsciinemaEvent] = _typed("ls\r") + [(0.5, "o", "file1\n")] + out = list(extract_session(events, sid="rt-noerr")) + assert [o for o in out if o.primitive == PRIMITIVE] == [] + + +def test_majority_rerun_emits_rerun() -> None: + """Operator re-invokes the same tool after each error → rerun.""" + events: list[AsciinemaEvent] = [] + for i in range(5): + events.extend(_err_then("foo", "foo", t0=i * 2.0)) + out = list(extract_session(events, sid="rt-rerun")) + assert _of(out, PRIMITIVE).value == "rerun" + + +def test_majority_switch_emits_switch() -> None: + """Operator pivots to a different tool after each error → switch.""" + events: list[AsciinemaEvent] = [] + for i in range(5): + events.extend(_err_then("foo", f"bar{i}", t0=i * 2.0)) + out = list(extract_session(events, sid="rt-switch")) + assert _of(out, PRIMITIVE).value == "switch" + + +def test_terminal_error_emits_abort() -> None: + """Single errored command at session end → abort (only candidate).""" + events = _err_then("foo", None, t0=0.0) + out = list(extract_session(events, sid="rt-abort")) + assert _of(out, PRIMITIVE).value == "abort" + + +def test_low_error_count_reduces_confidence() -> None: + short_events: list[AsciinemaEvent] = [] + for i in range(2): + short_events.extend(_err_then("foo", "foo", t0=i * 2.0)) + full_events: list[AsciinemaEvent] = [] + for i in range(6): + full_events.extend(_err_then("foo", "foo", t0=i * 2.0)) + s = _of(list(extract_session(short_events, sid="rt-short")), PRIMITIVE) + f = _of(list(extract_session(full_events, sid="rt-full")), PRIMITIVE) + assert s.confidence < f.confidence + + +def test_pii_no_command_bodies_in_observation() -> None: + events: list[AsciinemaEvent] = [] + for i in range(5): + events.extend(_err_then("supersecret", "supersecret", t0=i * 2.0)) + out = list(extract_session(events, sid="rt-pii")) + obs = _of(out, PRIMITIVE) + assert "supersecret" not in obs.model_dump_json()