feat(profiler/behave_shell): emit cognitive.error_resilience.retry_tactic
Modal response across Command.errored=True commands: * same first_token_hash on next command → rerun * different first_token_hash → switch * no next command → abort Tiebreak in registry order. The fourth registry value 'modify' requires within-command arg diffing (PII boundary); deferred to v0.2.
This commit is contained in:
@@ -14,6 +14,7 @@ from decnet.profiler.behave_shell._ctx import SessionContext
|
|||||||
from decnet.profiler.behave_shell._features.cognitive import (
|
from decnet.profiler.behave_shell._features.cognitive import (
|
||||||
cognitive_load,
|
cognitive_load,
|
||||||
command_branch_diversity,
|
command_branch_diversity,
|
||||||
|
error_resilience_retry_tactic,
|
||||||
exploration_style,
|
exploration_style,
|
||||||
feedback_loop_engagement,
|
feedback_loop_engagement,
|
||||||
planning_depth,
|
planning_depth,
|
||||||
@@ -53,4 +54,5 @@ FEATURES: tuple[FeatureFn, ...] = (
|
|||||||
exploration_style,
|
exploration_style,
|
||||||
planning_depth,
|
planning_depth,
|
||||||
tool_vocabulary,
|
tool_vocabulary,
|
||||||
|
error_resilience_retry_tactic,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -186,6 +186,59 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def error_resilience_retry_tactic(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.error_resilience.retry_tactic``.
|
||||||
|
|
||||||
|
For each command with ``Command.errored=True``, classify the
|
||||||
|
operator's response by the *next* command:
|
||||||
|
|
||||||
|
* **rerun** — same first_token_hash as the errored command. The
|
||||||
|
operator re-invoked the same tool (often after fixing args
|
||||||
|
mid-edit, but we can't see args).
|
||||||
|
* **switch** — different first_token_hash. Pivoted to a different
|
||||||
|
tool.
|
||||||
|
* **abort** — no next command. Session ended after the error.
|
||||||
|
|
||||||
|
The session's reported tactic is the **modal** response across all
|
||||||
|
errored commands (with ties broken in registry order: rerun >
|
||||||
|
modify > switch > abort). Skip emission entirely when no commands
|
||||||
|
errored — the registry has no ``unknown`` here, and silence is the
|
||||||
|
most honest answer.
|
||||||
|
|
||||||
|
The ``modify`` value (edit-and-retry) requires within-command
|
||||||
|
diffing of arg tokens, which crosses the PII boundary the engine
|
||||||
|
holds (only ``first_token_hash`` is retained per command). v0.1
|
||||||
|
therefore never emits ``modify``; v0.2 will once the PII trade-off
|
||||||
|
is revisited against a real attacker corpus.
|
||||||
|
"""
|
||||||
|
errored = [(i, c) for i, c in enumerate(ctx.commands) if c.errored]
|
||||||
|
if not errored:
|
||||||
|
return
|
||||||
|
counts = {"rerun": 0, "switch": 0, "abort": 0}
|
||||||
|
for i, cmd in errored:
|
||||||
|
if i + 1 >= len(ctx.commands):
|
||||||
|
counts["abort"] += 1
|
||||||
|
elif ctx.commands[i + 1].first_token_hash == cmd.first_token_hash:
|
||||||
|
counts["rerun"] += 1
|
||||||
|
else:
|
||||||
|
counts["switch"] += 1
|
||||||
|
# Registry-order tiebreak (rerun > modify > switch > abort).
|
||||||
|
# `modify` deferred — never increments here.
|
||||||
|
order = ("rerun", "switch", "abort")
|
||||||
|
value = max(order, key=lambda k: counts[k])
|
||||||
|
|
||||||
|
if len(errored) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
confidence = 0.65
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.error_resilience.retry_tactic",
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def tool_vocabulary(ctx: SessionContext) -> Iterator[Observation]:
|
def tool_vocabulary(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
"""Emit ``cognitive.tool_vocabulary`` ∈ {narrow, moderate, broad}.
|
"""Emit ``cognitive.tool_vocabulary`` ∈ {narrow, moderate, broad}.
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,85 @@
|
|||||||
|
"""Step D.5: ``cognitive.error_resilience.retry_tactic``."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell import extract_session
|
||||||
|
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||||
|
|
||||||
|
|
||||||
|
PRIMITIVE = "cognitive.error_resilience.retry_tactic"
|
||||||
|
|
||||||
|
|
||||||
|
def _of(observations: list, primitive: str):
|
||||||
|
obs = [o for o in observations if o.primitive == primitive]
|
||||||
|
assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
|
||||||
|
return obs[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _typed(text: str, t0: float = 0.0, dt: float = 0.05) -> list[AsciinemaEvent]:
|
||||||
|
return [(t0 + i * dt, "i", c) for i, c in enumerate(text)]
|
||||||
|
|
||||||
|
|
||||||
|
def _err_then(token: str, next_token: str | None, t0: float = 0.0) -> list[AsciinemaEvent]:
|
||||||
|
"""``token`` errors; ``next_token`` is the operator's response (or None).
|
||||||
|
|
||||||
|
Output event lands after the ``\\r`` so it falls inside the
|
||||||
|
command's post-execution window.
|
||||||
|
"""
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
events.extend(_typed(f"{token}\r", t0=t0))
|
||||||
|
cmd_end = t0 + len(token) * 0.05 # \r is the last char
|
||||||
|
events.append((cmd_end + 0.10, "o", f"bash: {token}: command not found\n"))
|
||||||
|
if next_token is not None:
|
||||||
|
events.extend(_typed(f"{next_token}\r", t0=t0 + 1.5))
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_errors_no_emission() -> None:
|
||||||
|
events: list[AsciinemaEvent] = _typed("ls\r") + [(0.5, "o", "file1\n")]
|
||||||
|
out = list(extract_session(events, sid="rt-noerr"))
|
||||||
|
assert [o for o in out if o.primitive == PRIMITIVE] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_majority_rerun_emits_rerun() -> None:
|
||||||
|
"""Operator re-invokes the same tool after each error → rerun."""
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(5):
|
||||||
|
events.extend(_err_then("foo", "foo", t0=i * 2.0))
|
||||||
|
out = list(extract_session(events, sid="rt-rerun"))
|
||||||
|
assert _of(out, PRIMITIVE).value == "rerun"
|
||||||
|
|
||||||
|
|
||||||
|
def test_majority_switch_emits_switch() -> None:
|
||||||
|
"""Operator pivots to a different tool after each error → switch."""
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(5):
|
||||||
|
events.extend(_err_then("foo", f"bar{i}", t0=i * 2.0))
|
||||||
|
out = list(extract_session(events, sid="rt-switch"))
|
||||||
|
assert _of(out, PRIMITIVE).value == "switch"
|
||||||
|
|
||||||
|
|
||||||
|
def test_terminal_error_emits_abort() -> None:
|
||||||
|
"""Single errored command at session end → abort (only candidate)."""
|
||||||
|
events = _err_then("foo", None, t0=0.0)
|
||||||
|
out = list(extract_session(events, sid="rt-abort"))
|
||||||
|
assert _of(out, PRIMITIVE).value == "abort"
|
||||||
|
|
||||||
|
|
||||||
|
def test_low_error_count_reduces_confidence() -> None:
|
||||||
|
short_events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(2):
|
||||||
|
short_events.extend(_err_then("foo", "foo", t0=i * 2.0))
|
||||||
|
full_events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(6):
|
||||||
|
full_events.extend(_err_then("foo", "foo", t0=i * 2.0))
|
||||||
|
s = _of(list(extract_session(short_events, sid="rt-short")), PRIMITIVE)
|
||||||
|
f = _of(list(extract_session(full_events, sid="rt-full")), PRIMITIVE)
|
||||||
|
assert s.confidence < f.confidence
|
||||||
|
|
||||||
|
|
||||||
|
def test_pii_no_command_bodies_in_observation() -> None:
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(5):
|
||||||
|
events.extend(_err_then("supersecret", "supersecret", t0=i * 2.0))
|
||||||
|
out = list(extract_session(events, sid="rt-pii"))
|
||||||
|
obs = _of(out, PRIMITIVE)
|
||||||
|
assert "supersecret" not in obs.model_dump_json()
|
||||||
Reference in New Issue
Block a user