feat(profiler/behave_shell): emit cognitive.feedback_loop_engagement

BEHAVE-EXTRACTOR.md Phase A Step 7. The orthogonal axis — does the operator's pause-after-command correlate with bytes of output they just saw? Splits HUMAN/CLAUDE-CL (closed_loop) from LW-sim/CLAUDE-FF (fire_and_forget); cuts ACROSS the LLM/human axis. * _features/cognitive.py:feedback_loop_engagement(ctx) emits one Observation in {closed_loop, fire_and_forget, unknown}. * Pearson correlation between ctx.output_per_cmd[i] and ctx.inter_cmd_iats[i] (paired by construction in Step 4); via statistics.correlation with constant-series fallback to "unknown". * r > FEEDBACK_CORRELATION_MIN (0.30) → closed_loop; otherwise (zero, negative, or undefined) → fire_and_forget. * First primitive that depends on output events: zero output events in the shard or fewer than FEEDBACK_MIN_PAIRS (5) pairs → emit "unknown" at confidence 1.0 (the absence-of-data is itself a high-confidence answer). Zero-command session skips entirely. Tests: no-output → unknown, few-pairs → unknown, strong positive r → closed_loop, constant pace → fire_and_forget/unknown, negative r → fire_and_forget.
2026-05-03 07:55:38 -04:00
parent 3fc6ea5f75
commit 2f8c107e70
3 changed files with 159 additions and 0 deletions
--- a/decnet/profiler/behave_shell/_features/init.py
+++ b/decnet/profiler/behave_shell/_features/init.py
@@ -13,6 +13,7 @@ from decnet_behave_core.spec.envelope import Observation
 from decnet.profiler.behave_shell._ctx import SessionContext
 from decnet.profiler.behave_shell._features.cognitive import (
    command_branch_diversity,
+    feedback_loop_engagement,
    inter_command_latency_class,
 )
 from decnet.profiler.behave_shell._features.motor import (
@@ -27,4 +28,5 @@ FEATURES: tuple[FeatureFn, ...] = (
    paste_burst_rate,
    inter_command_latency_class,
    command_branch_diversity,
+    feedback_loop_engagement,
 )
--- a/decnet/profiler/behave_shell/_features/cognitive.py
+++ b/decnet/profiler/behave_shell/_features/cognitive.py
@@ -16,6 +16,8 @@ from decnet.profiler.behave_shell._ctx import SessionContext
 from decnet.profiler.behave_shell._features._emit import make_observation
 from decnet.profiler.behave_shell._thresholds import (
    BRANCH_DIVERSITY_LINEAR_MIN,
+    FEEDBACK_CORRELATION_MIN,
+    FEEDBACK_MIN_PAIRS,
    INTER_CMD_DELIBERATE_MAX,
    INTER_CMD_INSTANT_MAX,
    INTER_CMD_LLM_HEAVYWEIGHT_MAX,
@@ -100,3 +102,53 @@ def command_branch_diversity(ctx: SessionContext) -> Iterator[Observation]:
        value=value,
        confidence=0.80,
    )
+
+
+def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.feedback_loop_engagement``.
+
+    Pearson correlation between ``output_per_cmd[i]`` (bytes the
+    operator saw before the next command) and
+    ``inter_cmd_iats[i]`` (the pause that followed). closed_loop
+    operators read more before pausing more; fire_and_forget operators
+    pace independently of output. CUTS ACROSS the LLM/human axis —
+    closed-loop LLMs and reading humans both score closed_loop.
+
+    First primitive that depends on output events: zero output events
+    in the shard → emit ``unknown`` at confidence 1.0 (no honest
+    correlation possible) and exit.
+    """
+    pairs = list(zip(ctx.output_per_cmd, ctx.inter_cmd_iats))
+    if not ctx.output_events or len(pairs) < FEEDBACK_MIN_PAIRS:
+        if not ctx.commands:
+            return
+        yield make_observation(
+            ctx,
+            primitive="cognitive.feedback_loop_engagement",
+            value="unknown",
+            confidence=1.0,
+        )
+        return
+    xs = [float(p[0]) for p in pairs]
+    ys = [float(p[1]) for p in pairs]
+    try:
+        r = statistics.correlation(xs, ys)
+    except statistics.StatisticsError:
+        # Constant series on either axis — correlation undefined.
+        yield make_observation(
+            ctx,
+            primitive="cognitive.feedback_loop_engagement",
+            value="unknown",
+            confidence=1.0,
+        )
+        return
+    if r > FEEDBACK_CORRELATION_MIN:
+        value = "closed_loop"
+    else:
+        value = "fire_and_forget"
+    yield make_observation(
+        ctx,
+        primitive="cognitive.feedback_loop_engagement",
+        value=value,
+        confidence=0.75,
+    )
--- a/tests/profiler/behave_shell/test_cognitive_feedback_loop_engagement.py
+++ b/tests/profiler/behave_shell/test_cognitive_feedback_loop_engagement.py
@@ -0,0 +1,105 @@
+"""Step 7: ``cognitive.feedback_loop_engagement``."""
+from __future__ import annotations
+
+from decnet.profiler.behave_shell import extract_session
+from decnet.profiler.behave_shell._parse import AsciinemaEvent
+
+
+def _of(observations: list, primitive: str):
+    obs = [o for o in observations if o.primitive == primitive]
+    assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
+    return obs[0]
+
+
+def _session_with_pairs(
+    output_byte_counts: list[int],
+    next_pauses: list[float],
+) -> list[AsciinemaEvent]:
+    """Build a session with N+1 commands, where the i-th (i in 0..N-1)
+    is followed by ``output_byte_counts[i]`` bytes of output, then a
+    pause of ``next_pauses[i]`` seconds, then the next command."""
+    assert len(output_byte_counts) == len(next_pauses)
+    events: list[AsciinemaEvent] = []
+    t = 0.0
+    for bytes_after, pause in zip(output_byte_counts, next_pauses):
+        # Issue command at t
+        events.append((t, "i", "x\r"))
+        # Emit one output event of the desired size shortly after
+        events.append((t + 0.01, "o", "y" * bytes_after))
+        # Next command starts after `pause`
+        t += pause
+    # Final terminating command
+    events.append((t, "i", "x\r"))
+    return events
+
+
+def test_no_output_events_emits_unknown() -> None:
+    # Only input, no output → unknown @ 1.0
+    events: list[AsciinemaEvent] = [(i * 1.0, "i", "x\r") for i in range(8)]
+    out = list(extract_session(events, sid="fb-no-output"))
+    obs = _of(out, "cognitive.feedback_loop_engagement")
+    assert obs.value == "unknown"
+    assert obs.confidence == 1.0
+
+
+def test_few_pairs_emits_unknown() -> None:
+    # 2 commands → 1 pair, below the min-pairs floor
+    events: list[AsciinemaEvent] = [
+        (0.0, "i", "x\r"),
+        (0.1, "o", "out"),
+        (1.0, "i", "x\r"),
+    ]
+    out = list(extract_session(events, sid="fb-few"))
+    obs = _of(out, "cognitive.feedback_loop_engagement")
+    assert obs.value == "unknown"
+
+
+def test_strong_positive_correlation_closed_loop() -> None:
+    # Larger output → longer pause: closed_loop
+    bytes_seen = [10, 100, 1000, 200, 50, 800]
+    pauses    = [1.0, 5.0, 20.0, 6.0, 2.0, 18.0]
+    out = list(extract_session(
+        _session_with_pairs(bytes_seen, pauses),
+        sid="fb-closed",
+    ))
+    obs = _of(out, "cognitive.feedback_loop_engagement")
+    assert obs.value == "closed_loop"
+    assert obs.confidence == 0.75
+
+
+def test_zero_correlation_fire_and_forget() -> None:
+    # Constant pace independent of output: fire_and_forget
+    bytes_seen = [10, 1000, 50, 800, 5, 200]
+    pauses    = [3.0, 3.0, 3.0, 3.0, 3.0, 3.0]
+    out = list(extract_session(
+        _session_with_pairs(bytes_seen, pauses),
+        sid="fb-fnf",
+    ))
+    obs = _of(out, "cognitive.feedback_loop_engagement")
+    # statistics.correlation raises on constant series; we map that
+    # to "unknown". A near-zero (non-constant) correlation maps to
+    # fire_and_forget. Either is correct here as long as it's NOT
+    # closed_loop.
+    assert obs.value in ("fire_and_forget", "unknown")
+    assert obs.value != "closed_loop"
+
+
+def test_negative_correlation_not_closed_loop() -> None:
+    # Big output, short pause / small output, long pause: negative r
+    bytes_seen = [10, 1000, 50, 800, 5, 200]
+    pauses    = [20.0, 1.0, 18.0, 2.0, 22.0, 5.0]
+    out = list(extract_session(
+        _session_with_pairs(bytes_seen, pauses),
+        sid="fb-neg",
+    ))
+    obs = _of(out, "cognitive.feedback_loop_engagement")
+    # Negative r is below FEEDBACK_CORRELATION_MIN (0.30) so it
+    # belongs to the fire_and_forget bucket — closed_loop is reserved
+    # for r > +0.30.
+    assert obs.value == "fire_and_forget"
+
+
+def test_no_commands_no_emission() -> None:
+    # No commands at all → not emitted (no honest answer)
+    out = list(extract_session([(0.0, "o", "hi")], sid="fb-nocmd"))
+    assert [o for o in out if o.primitive == "cognitive.feedback_loop_engagement"] == []