feat(profiler/behave_shell): emit cognitive.error_resilience.fallback_to_man
For each errored command, check whether the next command's
first_token_hash is in {man, help, info} (precomputed at module
load). At least one match → present, else absent. The --help / -h
flag forms aren't first tokens; v0.2 will reconsider once arg-token
hashing is justified by corpus.
This commit is contained in:
@@ -14,6 +14,7 @@ from decnet.profiler.behave_shell._ctx import SessionContext
|
|||||||
from decnet.profiler.behave_shell._features.cognitive import (
|
from decnet.profiler.behave_shell._features.cognitive import (
|
||||||
cognitive_load,
|
cognitive_load,
|
||||||
command_branch_diversity,
|
command_branch_diversity,
|
||||||
|
error_resilience_fallback_to_man,
|
||||||
error_resilience_frustration_typing,
|
error_resilience_frustration_typing,
|
||||||
error_resilience_retry_tactic,
|
error_resilience_retry_tactic,
|
||||||
exploration_style,
|
exploration_style,
|
||||||
@@ -57,4 +58,5 @@ FEATURES: tuple[FeatureFn, ...] = (
|
|||||||
tool_vocabulary,
|
tool_vocabulary,
|
||||||
error_resilience_retry_tactic,
|
error_resilience_retry_tactic,
|
||||||
error_resilience_frustration_typing,
|
error_resilience_frustration_typing,
|
||||||
|
error_resilience_fallback_to_man,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from decnet_behave_core.spec.envelope import Observation
|
|||||||
|
|
||||||
from decnet.profiler.behave_shell._ctx import SessionContext
|
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||||
from decnet.profiler.behave_shell._features._emit import make_observation
|
from decnet.profiler.behave_shell._features._emit import make_observation
|
||||||
|
from decnet.profiler.behave_shell._parse import hash_token
|
||||||
from decnet.profiler.behave_shell._thresholds import (
|
from decnet.profiler.behave_shell._thresholds import (
|
||||||
BRANCH_DIVERSITY_LINEAR_MIN,
|
BRANCH_DIVERSITY_LINEAR_MIN,
|
||||||
COGNITIVE_LOAD_CHUNKING_REF_CV,
|
COGNITIVE_LOAD_CHUNKING_REF_CV,
|
||||||
@@ -43,6 +44,19 @@ from decnet.profiler.behave_shell._thresholds import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Precomputed at import time so the per-session hot loop is a set
|
||||||
|
# membership check, not 3 sha256 ops per command. The ``--help`` /
|
||||||
|
# ``-h`` flag forms can't be detected here — they're not first tokens
|
||||||
|
# (PII discipline keeps only the *first* token's hash). v0.2 will
|
||||||
|
# reconsider once corpus calibration justifies storing arg-token
|
||||||
|
# hashes too.
|
||||||
|
_HELP_FAMILY_HASHES: frozenset[str] = frozenset({
|
||||||
|
hash_token("man"),
|
||||||
|
hash_token("help"),
|
||||||
|
hash_token("info"),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
def _clip01(x: float) -> float:
|
def _clip01(x: float) -> float:
|
||||||
if x < 0.0:
|
if x < 0.0:
|
||||||
return 0.0
|
return 0.0
|
||||||
@@ -188,6 +202,46 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def error_resilience_fallback_to_man(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.error_resilience.fallback_to_man``.
|
||||||
|
|
||||||
|
For each errored command, check whether the operator's next
|
||||||
|
command is ``man`` / ``help`` / ``info`` — i.e. they reached for
|
||||||
|
the manual rather than re-trying or pivoting. If at least one
|
||||||
|
errored command triggered this fallback → ``present``; otherwise
|
||||||
|
``absent``.
|
||||||
|
|
||||||
|
Skip emission when no commands errored — the registry's binary
|
||||||
|
has no ``unknown``, and emitting ``absent`` from no observation
|
||||||
|
at all would be dishonest.
|
||||||
|
|
||||||
|
The ``--help`` / ``-h`` flag forms can't fire this primitive in
|
||||||
|
v0.1: they aren't first tokens, and the engine only retains
|
||||||
|
``first_token_hash`` per command (PII discipline). Filed for v0.2.
|
||||||
|
"""
|
||||||
|
errored_indices = [i for i, c in enumerate(ctx.commands) if c.errored]
|
||||||
|
if not errored_indices:
|
||||||
|
return
|
||||||
|
fallback_count = 0
|
||||||
|
for i in errored_indices:
|
||||||
|
if i + 1 >= len(ctx.commands):
|
||||||
|
continue
|
||||||
|
if ctx.commands[i + 1].first_token_hash in _HELP_FAMILY_HASHES:
|
||||||
|
fallback_count += 1
|
||||||
|
value = "present" if fallback_count > 0 else "absent"
|
||||||
|
|
||||||
|
if len(errored_indices) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
confidence = 0.65
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.error_resilience.fallback_to_man",
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def error_resilience_frustration_typing(ctx: SessionContext) -> Iterator[Observation]:
|
def error_resilience_frustration_typing(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
"""Emit ``cognitive.error_resilience.frustration_typing``.
|
"""Emit ``cognitive.error_resilience.frustration_typing``.
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,72 @@
|
|||||||
|
"""Step D.7: ``cognitive.error_resilience.fallback_to_man``."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell import extract_session
|
||||||
|
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||||
|
|
||||||
|
|
||||||
|
PRIMITIVE = "cognitive.error_resilience.fallback_to_man"
|
||||||
|
|
||||||
|
|
||||||
|
def _of(observations: list, primitive: str):
|
||||||
|
obs = [o for o in observations if o.primitive == primitive]
|
||||||
|
assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
|
||||||
|
return obs[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _typed(text: str, t0: float, dt: float = 0.05) -> list[AsciinemaEvent]:
|
||||||
|
return [(t0 + i * dt, "i", c) for i, c in enumerate(text)]
|
||||||
|
|
||||||
|
|
||||||
|
def _err_then(token: str, response: str, t0: float) -> list[AsciinemaEvent]:
|
||||||
|
events = _typed(f"{token}\r", t0=t0)
|
||||||
|
cmd_end = t0 + len(token) * 0.05
|
||||||
|
events.append((cmd_end + 0.10, "o", f"bash: {token}: command not found\n"))
|
||||||
|
events.extend(_typed(f"{response}\r", t0=t0 + 1.5))
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_errors_no_emission() -> None:
|
||||||
|
events = _typed("ls\r", t0=0.0) + [(0.5, "o", "file1\n")]
|
||||||
|
out = list(extract_session(events, sid="ftm-clean"))
|
||||||
|
assert [o for o in out if o.primitive == PRIMITIVE] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_help_after_error_emits_present() -> None:
|
||||||
|
"""At least one fallback to a help-family token → present."""
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(5):
|
||||||
|
events.extend(_err_then("foo", "man", t0=i * 3.0))
|
||||||
|
out = list(extract_session(events, sid="ftm-present"))
|
||||||
|
obs = _of(out, PRIMITIVE)
|
||||||
|
assert obs.value == "present"
|
||||||
|
|
||||||
|
|
||||||
|
def test_pivot_unrelated_emits_absent() -> None:
|
||||||
|
"""Errors followed by non-help tools → absent."""
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(5):
|
||||||
|
events.extend(_err_then("foo", "ls", t0=i * 3.0))
|
||||||
|
out = list(extract_session(events, sid="ftm-absent"))
|
||||||
|
obs = _of(out, PRIMITIVE)
|
||||||
|
assert obs.value == "absent"
|
||||||
|
|
||||||
|
|
||||||
|
def test_info_token_also_counts() -> None:
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(5):
|
||||||
|
events.extend(_err_then("foo", "info", t0=i * 3.0))
|
||||||
|
out = list(extract_session(events, sid="ftm-info"))
|
||||||
|
assert _of(out, PRIMITIVE).value == "present"
|
||||||
|
|
||||||
|
|
||||||
|
def test_low_error_count_reduces_confidence() -> None:
|
||||||
|
short_events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(2):
|
||||||
|
short_events.extend(_err_then("foo", "man", t0=i * 3.0))
|
||||||
|
full_events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(6):
|
||||||
|
full_events.extend(_err_then("foo", "man", t0=i * 3.0))
|
||||||
|
s = _of(list(extract_session(short_events, sid="ftm-short")), PRIMITIVE)
|
||||||
|
f = _of(list(extract_session(full_events, sid="ftm-full")), PRIMITIVE)
|
||||||
|
assert s.confidence < f.confidence
|
||||||
Reference in New Issue
Block a user