From f948e10830f48c0d2b82955dc14f6d16da26764c Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 3 May 2026 23:52:29 -0400 Subject: [PATCH] feat(profiler/behave_shell): emit cognitive.cognitive_load Composite over three [0, 1]-clipped sub-signals (chunking variance, error rate from D.0's Command.errored, pace variability), mean-aggregated and bucketed against COGNITIVE_LOAD_LOW_MAX / COGNITIVE_LOAD_MEDIUM_MAX. Components missing data drop out of the mean rather than zeroing it. v0.1 thresholds; D.8 re-tunes once D.2-D.7 are stable. Confidence held at 0.60 (composite over soft sub-signals) and halved below the 5-command sample-size floor. --- .../behave_shell/_features/__init__.py | 2 + .../behave_shell/_features/cognitive.py | 97 +++++++++++++++++++ decnet/profiler/behave_shell/_thresholds.py | 21 ++++ .../test_cognitive_cognitive_load.py | 88 +++++++++++++++++ 4 files changed, 208 insertions(+) create mode 100644 tests/profiler/behave_shell/test_cognitive_cognitive_load.py diff --git a/decnet/profiler/behave_shell/_features/__init__.py b/decnet/profiler/behave_shell/_features/__init__.py index 471eb3cf..33132d6c 100644 --- a/decnet/profiler/behave_shell/_features/__init__.py +++ b/decnet/profiler/behave_shell/_features/__init__.py @@ -12,6 +12,7 @@ from decnet_behave_core.spec.envelope import Observation from decnet.profiler.behave_shell._ctx import SessionContext from decnet.profiler.behave_shell._features.cognitive import ( + cognitive_load, command_branch_diversity, feedback_loop_engagement, inter_command_consistency, @@ -45,4 +46,5 @@ FEATURES: tuple[FeatureFn, ...] = ( command_branch_diversity, feedback_loop_engagement, inter_command_consistency, + cognitive_load, ) diff --git a/decnet/profiler/behave_shell/_features/cognitive.py b/decnet/profiler/behave_shell/_features/cognitive.py index 058becd1..7bc72b54 100644 --- a/decnet/profiler/behave_shell/_features/cognitive.py +++ b/decnet/profiler/behave_shell/_features/cognitive.py @@ -4,6 +4,7 @@ Step 5: ``cognitive.inter_command_latency_class``. Step 6: ``cognitive.command_branch_diversity``. Step 7: ``cognitive.feedback_loop_engagement``. Step 8: ``cognitive.inter_command_consistency``. +Step D.1: ``cognitive.cognitive_load``. """ from __future__ import annotations @@ -16,6 +17,10 @@ from decnet.profiler.behave_shell._ctx import SessionContext from decnet.profiler.behave_shell._features._emit import make_observation from decnet.profiler.behave_shell._thresholds import ( BRANCH_DIVERSITY_LINEAR_MIN, + COGNITIVE_LOAD_CHUNKING_REF_CV, + COGNITIVE_LOAD_LOW_MAX, + COGNITIVE_LOAD_MEDIUM_MAX, + COGNITIVE_LOAD_PACE_REF_CV, FEEDBACK_CORRELATION_MIN, FEEDBACK_MIN_PAIRS, INTER_CMD_DELIBERATE_MAX, @@ -29,6 +34,24 @@ from decnet.profiler.behave_shell._thresholds import ( ) +def _clip01(x: float) -> float: + if x < 0.0: + return 0.0 + if x > 1.0: + return 1.0 + return x + + +def _cv(xs: tuple[float, ...] | list[float]) -> float | None: + """Coefficient of variation; ``None`` if undefined (n<2 or mean==0).""" + if len(xs) < 2: + return None + mean = statistics.fmean(xs) + if mean <= 0.0: + return None + return statistics.stdev(xs) / mean + + def _bucket_inter_cmd_latency(median_iat: float) -> str: if median_iat <= INTER_CMD_INSTANT_MAX: return "instant" @@ -156,6 +179,80 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]: ) +def cognitive_load(ctx: SessionContext) -> Iterator[Observation]: + """Emit ``cognitive.cognitive_load`` ∈ {low, medium, high}. + + Composite of three [0, 1]-clipped sub-signals, mean-aggregated: + + * **chunking** — median CV of intra-command IATs / reference CV. + Fragmented mid-command typing → high contribution. + * **errors** — fraction of commands whose post-execution output + matched a canonical error fingerprint (``Command.errored`` from + Step D.0). Failures pile load. + * **pace variability** — CV of inter-command IATs / reference CV. + A spread of think-pause durations → unsettled cadence → load. + + Components missing data contribute 0.0 (no penalty for an absent + signal), and the composite normalises by *available* component + count so a session with zero inter-command pauses isn't punished + for the silence. Skip emission entirely when no commands at all + exist — there's no honest answer. + + v0.1 thresholds; D.8 re-tunes once the rest of Phase D is stable. + """ + if not ctx.commands: + return + + # Component A: chunking variance — median within-command CV + per_cmd_cvs: list[float] = [] + for cmd_iats in ctx.intra_command_iats: + cv = _cv(cmd_iats) + if cv is not None: + per_cmd_cvs.append(cv) + if per_cmd_cvs: + chunking_load: float | None = _clip01( + statistics.median(per_cmd_cvs) / COGNITIVE_LOAD_CHUNKING_REF_CV + ) + else: + chunking_load = None + + # Component B: error rate + error_load: float = sum(1 for c in ctx.commands if c.errored) / len(ctx.commands) + error_load = _clip01(error_load) + + # Component C: pace variability — CV of inter-command IATs + pace_cv = _cv(ctx.inter_cmd_iats) + if pace_cv is not None: + pace_load: float | None = _clip01(pace_cv / COGNITIVE_LOAD_PACE_REF_CV) + else: + pace_load = None + + components = [c for c in (chunking_load, error_load, pace_load) if c is not None] + if not components: + return + load = sum(components) / len(components) + + if load < COGNITIVE_LOAD_LOW_MAX: + value = "low" + elif load < COGNITIVE_LOAD_MEDIUM_MAX: + value = "medium" + else: + value = "high" + + if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE: + confidence = 0.40 + else: + # Composite over three soft sub-signals — held below the + # cap of single-source primitives. D.8 re-tunes. + confidence = 0.60 + yield make_observation( + ctx, + primitive="cognitive.cognitive_load", + value=value, + confidence=confidence, + ) + + def inter_command_consistency(ctx: SessionContext) -> Iterator[Observation]: """Emit ``cognitive.inter_command_consistency``. diff --git a/decnet/profiler/behave_shell/_thresholds.py b/decnet/profiler/behave_shell/_thresholds.py index a337efc8..a5ad72dc 100644 --- a/decnet/profiler/behave_shell/_thresholds.py +++ b/decnet/profiler/behave_shell/_thresholds.py @@ -87,6 +87,27 @@ PAUSE_CV_BIMODAL_MIN: float = 1.50 # ``cognitive_load`` must be reflected by editing the patterns tuple # (not a constant, so no boundary-band logic applies). +# ── cognitive.cognitive_load (Step D.1) ───────────────────────────────────── +# Composite ∈ [0, 1] over three sub-signals (each clipped to [0, 1]): +# +# A = chunking_load = median_intra_cmd_cv / CHUNKING_REF_CV +# B = error_load = errored_cmds / total_cmds +# C = pace_variability_load = (stdev / mean of inter_cmd_iats) / PACE_REF_CV +# +# load = mean(A, B, C); bucket: +# load < COGNITIVE_LOAD_LOW_MAX → low +# load < COGNITIVE_LOAD_MEDIUM_MAX → medium +# else → high +# +# v0.1 thresholds — D.8 re-tunes once D.1-D.7 are stable. The reference +# CVs (CHUNKING_REF_CV / PACE_REF_CV) are the value at which that single +# component saturates to a load contribution of 1.0; anything past +# saturates the term but doesn't double-count. +COGNITIVE_LOAD_CHUNKING_REF_CV: float = 1.00 +COGNITIVE_LOAD_PACE_REF_CV: float = 1.50 +COGNITIVE_LOAD_LOW_MAX: float = 0.33 +COGNITIVE_LOAD_MEDIUM_MAX: float = 0.67 + # ── motor.keystroke_cadence (Step B.1) ────────────────────────────────────── # Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between # commands don't inflate the within-burst CV. Mirrors the prototype's diff --git a/tests/profiler/behave_shell/test_cognitive_cognitive_load.py b/tests/profiler/behave_shell/test_cognitive_cognitive_load.py new file mode 100644 index 00000000..55abceef --- /dev/null +++ b/tests/profiler/behave_shell/test_cognitive_cognitive_load.py @@ -0,0 +1,88 @@ +"""Step D.1: ``cognitive.cognitive_load``. + +Composite of three [0, 1]-clipped sub-signals (chunking variance, error +rate, pace variability) → bucketed against COGNITIVE_LOAD_LOW_MAX / +COGNITIVE_LOAD_MEDIUM_MAX. Tests pin each component at its extremes and +confirm the bucket falls where the math says. +""" +from __future__ import annotations + +from decnet.profiler.behave_shell import extract_session +from decnet.profiler.behave_shell._parse import AsciinemaEvent + + +def _of(observations: list, primitive: str): + obs = [o for o in observations if o.primitive == primitive] + assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}" + return obs[0] + + +def _typed(text: str, t0: float = 0.0, dt: float = 0.05) -> list[AsciinemaEvent]: + return [(t0 + i * dt, "i", c) for i, c in enumerate(text)] + + +def _metronomic_clean_session(n: int = 8) -> list[AsciinemaEvent]: + """``n`` commands, perfectly even pacing, zero errors, fluent typing.""" + events: list[AsciinemaEvent] = [] + for i in range(n): + events.extend(_typed("ls\r", t0=i * 1.0, dt=0.05)) + return events + + +def test_no_commands_no_emission() -> None: + events: list[AsciinemaEvent] = [(0.0, "i", "a")] + out = list(extract_session(events, sid="cl-empty")) + assert [o for o in out if o.primitive == "cognitive.cognitive_load"] == [] + + +def test_metronomic_clean_session_emits_low() -> None: + """Even pacing + clean output + steady typing → low load.""" + out = list(extract_session(_metronomic_clean_session(8), sid="cl-low")) + obs = _of(out, "cognitive.cognitive_load") + assert obs.value == "low" + + +def test_high_error_rate_drives_load_up() -> None: + """Every command errored — error_load = 1.0 alone forces load >= 0.33.""" + events: list[AsciinemaEvent] = [] + for i in range(8): + events.extend(_typed("foo\r", t0=i * 1.0, dt=0.05)) + events.append((i * 1.0 + 0.5, "o", "bash: foo: command not found\n")) + out = list(extract_session(events, sid="cl-err")) + obs = _of(out, "cognitive.cognitive_load") + assert obs.value in ("medium", "high") + + +def test_all_three_components_high_emits_high() -> None: + """Saturate every component → load ≈ 1.0 → high.""" + events: list[AsciinemaEvent] = [] + # Burst-then-gap pacing maximises pace-CV; mid-command jitter + # maximises chunking-CV; every command errors. + starts = [0.0, 0.1, 0.2, 30.0, 30.1, 60.0, 90.0, 90.1] + for i, s in enumerate(starts): + # Mid-command jitter: 'a' at s, 'b' 0.01s later, 'c' 2s later, '\r' 2.05s later + events.append((s, "i", "a")) + events.append((s + 0.01, "i", "b")) + events.append((s + 2.0, "i", "c")) + events.append((s + 2.05, "i", "\r")) + events.append((s + 2.10, "o", "bash: abc: command not found\n")) + out = list(extract_session(events, sid="cl-high")) + obs = _of(out, "cognitive.cognitive_load") + assert obs.value == "high" + + +def test_low_sample_count_reduces_confidence() -> None: + short = list(extract_session(_metronomic_clean_session(3), sid="cl-short")) + full = list(extract_session(_metronomic_clean_session(8), sid="cl-full")) + s = _of(short, "cognitive.cognitive_load") + f = _of(full, "cognitive.cognitive_load") + assert s.confidence < f.confidence + + +def test_pii_no_command_bodies_in_observation() -> None: + events: list[AsciinemaEvent] = [] + for i in range(6): + events.extend(_typed("supersecret\r", t0=i * 1.0, dt=0.05)) + out = list(extract_session(events, sid="cl-pii")) + obs = _of(out, "cognitive.cognitive_load") + assert "supersecret" not in obs.model_dump_json()