diff --git a/decnet/profiler/behave_shell/_features/__init__.py b/decnet/profiler/behave_shell/_features/__init__.py index ae5d721f..5a90323a 100644 --- a/decnet/profiler/behave_shell/_features/__init__.py +++ b/decnet/profiler/behave_shell/_features/__init__.py @@ -13,6 +13,7 @@ from decnet_behave_core.spec.envelope import Observation from decnet.profiler.behave_shell._ctx import SessionContext from decnet.profiler.behave_shell._features.cognitive import ( command_branch_diversity, + feedback_loop_engagement, inter_command_latency_class, ) from decnet.profiler.behave_shell._features.motor import ( @@ -27,4 +28,5 @@ FEATURES: tuple[FeatureFn, ...] = ( paste_burst_rate, inter_command_latency_class, command_branch_diversity, + feedback_loop_engagement, ) diff --git a/decnet/profiler/behave_shell/_features/cognitive.py b/decnet/profiler/behave_shell/_features/cognitive.py index 68581160..c5563489 100644 --- a/decnet/profiler/behave_shell/_features/cognitive.py +++ b/decnet/profiler/behave_shell/_features/cognitive.py @@ -16,6 +16,8 @@ from decnet.profiler.behave_shell._ctx import SessionContext from decnet.profiler.behave_shell._features._emit import make_observation from decnet.profiler.behave_shell._thresholds import ( BRANCH_DIVERSITY_LINEAR_MIN, + FEEDBACK_CORRELATION_MIN, + FEEDBACK_MIN_PAIRS, INTER_CMD_DELIBERATE_MAX, INTER_CMD_INSTANT_MAX, INTER_CMD_LLM_HEAVYWEIGHT_MAX, @@ -100,3 +102,53 @@ def command_branch_diversity(ctx: SessionContext) -> Iterator[Observation]: value=value, confidence=0.80, ) + + +def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]: + """Emit ``cognitive.feedback_loop_engagement``. + + Pearson correlation between ``output_per_cmd[i]`` (bytes the + operator saw before the next command) and + ``inter_cmd_iats[i]`` (the pause that followed). closed_loop + operators read more before pausing more; fire_and_forget operators + pace independently of output. CUTS ACROSS the LLM/human axis — + closed-loop LLMs and reading humans both score closed_loop. + + First primitive that depends on output events: zero output events + in the shard → emit ``unknown`` at confidence 1.0 (no honest + correlation possible) and exit. + """ + pairs = list(zip(ctx.output_per_cmd, ctx.inter_cmd_iats)) + if not ctx.output_events or len(pairs) < FEEDBACK_MIN_PAIRS: + if not ctx.commands: + return + yield make_observation( + ctx, + primitive="cognitive.feedback_loop_engagement", + value="unknown", + confidence=1.0, + ) + return + xs = [float(p[0]) for p in pairs] + ys = [float(p[1]) for p in pairs] + try: + r = statistics.correlation(xs, ys) + except statistics.StatisticsError: + # Constant series on either axis — correlation undefined. + yield make_observation( + ctx, + primitive="cognitive.feedback_loop_engagement", + value="unknown", + confidence=1.0, + ) + return + if r > FEEDBACK_CORRELATION_MIN: + value = "closed_loop" + else: + value = "fire_and_forget" + yield make_observation( + ctx, + primitive="cognitive.feedback_loop_engagement", + value=value, + confidence=0.75, + ) diff --git a/tests/profiler/behave_shell/test_cognitive_feedback_loop_engagement.py b/tests/profiler/behave_shell/test_cognitive_feedback_loop_engagement.py new file mode 100644 index 00000000..77cb1075 --- /dev/null +++ b/tests/profiler/behave_shell/test_cognitive_feedback_loop_engagement.py @@ -0,0 +1,105 @@ +"""Step 7: ``cognitive.feedback_loop_engagement``.""" +from __future__ import annotations + +from decnet.profiler.behave_shell import extract_session +from decnet.profiler.behave_shell._parse import AsciinemaEvent + + +def _of(observations: list, primitive: str): + obs = [o for o in observations if o.primitive == primitive] + assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}" + return obs[0] + + +def _session_with_pairs( + output_byte_counts: list[int], + next_pauses: list[float], +) -> list[AsciinemaEvent]: + """Build a session with N+1 commands, where the i-th (i in 0..N-1) + is followed by ``output_byte_counts[i]`` bytes of output, then a + pause of ``next_pauses[i]`` seconds, then the next command.""" + assert len(output_byte_counts) == len(next_pauses) + events: list[AsciinemaEvent] = [] + t = 0.0 + for bytes_after, pause in zip(output_byte_counts, next_pauses): + # Issue command at t + events.append((t, "i", "x\r")) + # Emit one output event of the desired size shortly after + events.append((t + 0.01, "o", "y" * bytes_after)) + # Next command starts after `pause` + t += pause + # Final terminating command + events.append((t, "i", "x\r")) + return events + + +def test_no_output_events_emits_unknown() -> None: + # Only input, no output → unknown @ 1.0 + events: list[AsciinemaEvent] = [(i * 1.0, "i", "x\r") for i in range(8)] + out = list(extract_session(events, sid="fb-no-output")) + obs = _of(out, "cognitive.feedback_loop_engagement") + assert obs.value == "unknown" + assert obs.confidence == 1.0 + + +def test_few_pairs_emits_unknown() -> None: + # 2 commands → 1 pair, below the min-pairs floor + events: list[AsciinemaEvent] = [ + (0.0, "i", "x\r"), + (0.1, "o", "out"), + (1.0, "i", "x\r"), + ] + out = list(extract_session(events, sid="fb-few")) + obs = _of(out, "cognitive.feedback_loop_engagement") + assert obs.value == "unknown" + + +def test_strong_positive_correlation_closed_loop() -> None: + # Larger output → longer pause: closed_loop + bytes_seen = [10, 100, 1000, 200, 50, 800] + pauses = [1.0, 5.0, 20.0, 6.0, 2.0, 18.0] + out = list(extract_session( + _session_with_pairs(bytes_seen, pauses), + sid="fb-closed", + )) + obs = _of(out, "cognitive.feedback_loop_engagement") + assert obs.value == "closed_loop" + assert obs.confidence == 0.75 + + +def test_zero_correlation_fire_and_forget() -> None: + # Constant pace independent of output: fire_and_forget + bytes_seen = [10, 1000, 50, 800, 5, 200] + pauses = [3.0, 3.0, 3.0, 3.0, 3.0, 3.0] + out = list(extract_session( + _session_with_pairs(bytes_seen, pauses), + sid="fb-fnf", + )) + obs = _of(out, "cognitive.feedback_loop_engagement") + # statistics.correlation raises on constant series; we map that + # to "unknown". A near-zero (non-constant) correlation maps to + # fire_and_forget. Either is correct here as long as it's NOT + # closed_loop. + assert obs.value in ("fire_and_forget", "unknown") + assert obs.value != "closed_loop" + + +def test_negative_correlation_not_closed_loop() -> None: + # Big output, short pause / small output, long pause: negative r + bytes_seen = [10, 1000, 50, 800, 5, 200] + pauses = [20.0, 1.0, 18.0, 2.0, 22.0, 5.0] + out = list(extract_session( + _session_with_pairs(bytes_seen, pauses), + sid="fb-neg", + )) + obs = _of(out, "cognitive.feedback_loop_engagement") + # Negative r is below FEEDBACK_CORRELATION_MIN (0.30) so it + # belongs to the fire_and_forget bucket — closed_loop is reserved + # for r > +0.30. + assert obs.value == "fire_and_forget" + + +def test_no_commands_no_emission() -> None: + # No commands at all → not emitted (no honest answer) + out = list(extract_session([(0.0, "o", "hi")], sid="fb-nocmd")) + assert [o for o in out if o.primitive == "cognitive.feedback_loop_engagement"] == []