feat(profiler/behave_shell): emit cognitive.cognitive_load
Composite over three [0, 1]-clipped sub-signals (chunking variance, error rate from D.0's Command.errored, pace variability), mean-aggregated and bucketed against COGNITIVE_LOAD_LOW_MAX / COGNITIVE_LOAD_MEDIUM_MAX. Components missing data drop out of the mean rather than zeroing it. v0.1 thresholds; D.8 re-tunes once D.2-D.7 are stable. Confidence held at 0.60 (composite over soft sub-signals) and halved below the 5-command sample-size floor.
This commit is contained in:
@@ -12,6 +12,7 @@ from decnet_behave_core.spec.envelope import Observation
|
|||||||
|
|
||||||
from decnet.profiler.behave_shell._ctx import SessionContext
|
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||||
from decnet.profiler.behave_shell._features.cognitive import (
|
from decnet.profiler.behave_shell._features.cognitive import (
|
||||||
|
cognitive_load,
|
||||||
command_branch_diversity,
|
command_branch_diversity,
|
||||||
feedback_loop_engagement,
|
feedback_loop_engagement,
|
||||||
inter_command_consistency,
|
inter_command_consistency,
|
||||||
@@ -45,4 +46,5 @@ FEATURES: tuple[FeatureFn, ...] = (
|
|||||||
command_branch_diversity,
|
command_branch_diversity,
|
||||||
feedback_loop_engagement,
|
feedback_loop_engagement,
|
||||||
inter_command_consistency,
|
inter_command_consistency,
|
||||||
|
cognitive_load,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ Step 5: ``cognitive.inter_command_latency_class``.
|
|||||||
Step 6: ``cognitive.command_branch_diversity``.
|
Step 6: ``cognitive.command_branch_diversity``.
|
||||||
Step 7: ``cognitive.feedback_loop_engagement``.
|
Step 7: ``cognitive.feedback_loop_engagement``.
|
||||||
Step 8: ``cognitive.inter_command_consistency``.
|
Step 8: ``cognitive.inter_command_consistency``.
|
||||||
|
Step D.1: ``cognitive.cognitive_load``.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
@@ -16,6 +17,10 @@ from decnet.profiler.behave_shell._ctx import SessionContext
|
|||||||
from decnet.profiler.behave_shell._features._emit import make_observation
|
from decnet.profiler.behave_shell._features._emit import make_observation
|
||||||
from decnet.profiler.behave_shell._thresholds import (
|
from decnet.profiler.behave_shell._thresholds import (
|
||||||
BRANCH_DIVERSITY_LINEAR_MIN,
|
BRANCH_DIVERSITY_LINEAR_MIN,
|
||||||
|
COGNITIVE_LOAD_CHUNKING_REF_CV,
|
||||||
|
COGNITIVE_LOAD_LOW_MAX,
|
||||||
|
COGNITIVE_LOAD_MEDIUM_MAX,
|
||||||
|
COGNITIVE_LOAD_PACE_REF_CV,
|
||||||
FEEDBACK_CORRELATION_MIN,
|
FEEDBACK_CORRELATION_MIN,
|
||||||
FEEDBACK_MIN_PAIRS,
|
FEEDBACK_MIN_PAIRS,
|
||||||
INTER_CMD_DELIBERATE_MAX,
|
INTER_CMD_DELIBERATE_MAX,
|
||||||
@@ -29,6 +34,24 @@ from decnet.profiler.behave_shell._thresholds import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _clip01(x: float) -> float:
|
||||||
|
if x < 0.0:
|
||||||
|
return 0.0
|
||||||
|
if x > 1.0:
|
||||||
|
return 1.0
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def _cv(xs: tuple[float, ...] | list[float]) -> float | None:
|
||||||
|
"""Coefficient of variation; ``None`` if undefined (n<2 or mean==0)."""
|
||||||
|
if len(xs) < 2:
|
||||||
|
return None
|
||||||
|
mean = statistics.fmean(xs)
|
||||||
|
if mean <= 0.0:
|
||||||
|
return None
|
||||||
|
return statistics.stdev(xs) / mean
|
||||||
|
|
||||||
|
|
||||||
def _bucket_inter_cmd_latency(median_iat: float) -> str:
|
def _bucket_inter_cmd_latency(median_iat: float) -> str:
|
||||||
if median_iat <= INTER_CMD_INSTANT_MAX:
|
if median_iat <= INTER_CMD_INSTANT_MAX:
|
||||||
return "instant"
|
return "instant"
|
||||||
@@ -156,6 +179,80 @@ def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def cognitive_load(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.cognitive_load`` ∈ {low, medium, high}.
|
||||||
|
|
||||||
|
Composite of three [0, 1]-clipped sub-signals, mean-aggregated:
|
||||||
|
|
||||||
|
* **chunking** — median CV of intra-command IATs / reference CV.
|
||||||
|
Fragmented mid-command typing → high contribution.
|
||||||
|
* **errors** — fraction of commands whose post-execution output
|
||||||
|
matched a canonical error fingerprint (``Command.errored`` from
|
||||||
|
Step D.0). Failures pile load.
|
||||||
|
* **pace variability** — CV of inter-command IATs / reference CV.
|
||||||
|
A spread of think-pause durations → unsettled cadence → load.
|
||||||
|
|
||||||
|
Components missing data contribute 0.0 (no penalty for an absent
|
||||||
|
signal), and the composite normalises by *available* component
|
||||||
|
count so a session with zero inter-command pauses isn't punished
|
||||||
|
for the silence. Skip emission entirely when no commands at all
|
||||||
|
exist — there's no honest answer.
|
||||||
|
|
||||||
|
v0.1 thresholds; D.8 re-tunes once the rest of Phase D is stable.
|
||||||
|
"""
|
||||||
|
if not ctx.commands:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Component A: chunking variance — median within-command CV
|
||||||
|
per_cmd_cvs: list[float] = []
|
||||||
|
for cmd_iats in ctx.intra_command_iats:
|
||||||
|
cv = _cv(cmd_iats)
|
||||||
|
if cv is not None:
|
||||||
|
per_cmd_cvs.append(cv)
|
||||||
|
if per_cmd_cvs:
|
||||||
|
chunking_load: float | None = _clip01(
|
||||||
|
statistics.median(per_cmd_cvs) / COGNITIVE_LOAD_CHUNKING_REF_CV
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
chunking_load = None
|
||||||
|
|
||||||
|
# Component B: error rate
|
||||||
|
error_load: float = sum(1 for c in ctx.commands if c.errored) / len(ctx.commands)
|
||||||
|
error_load = _clip01(error_load)
|
||||||
|
|
||||||
|
# Component C: pace variability — CV of inter-command IATs
|
||||||
|
pace_cv = _cv(ctx.inter_cmd_iats)
|
||||||
|
if pace_cv is not None:
|
||||||
|
pace_load: float | None = _clip01(pace_cv / COGNITIVE_LOAD_PACE_REF_CV)
|
||||||
|
else:
|
||||||
|
pace_load = None
|
||||||
|
|
||||||
|
components = [c for c in (chunking_load, error_load, pace_load) if c is not None]
|
||||||
|
if not components:
|
||||||
|
return
|
||||||
|
load = sum(components) / len(components)
|
||||||
|
|
||||||
|
if load < COGNITIVE_LOAD_LOW_MAX:
|
||||||
|
value = "low"
|
||||||
|
elif load < COGNITIVE_LOAD_MEDIUM_MAX:
|
||||||
|
value = "medium"
|
||||||
|
else:
|
||||||
|
value = "high"
|
||||||
|
|
||||||
|
if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
# Composite over three soft sub-signals — held below the
|
||||||
|
# cap of single-source primitives. D.8 re-tunes.
|
||||||
|
confidence = 0.60
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.cognitive_load",
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def inter_command_consistency(ctx: SessionContext) -> Iterator[Observation]:
|
def inter_command_consistency(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
"""Emit ``cognitive.inter_command_consistency``.
|
"""Emit ``cognitive.inter_command_consistency``.
|
||||||
|
|
||||||
|
|||||||
@@ -87,6 +87,27 @@ PAUSE_CV_BIMODAL_MIN: float = 1.50
|
|||||||
# ``cognitive_load`` must be reflected by editing the patterns tuple
|
# ``cognitive_load`` must be reflected by editing the patterns tuple
|
||||||
# (not a constant, so no boundary-band logic applies).
|
# (not a constant, so no boundary-band logic applies).
|
||||||
|
|
||||||
|
# ── cognitive.cognitive_load (Step D.1) ─────────────────────────────────────
|
||||||
|
# Composite ∈ [0, 1] over three sub-signals (each clipped to [0, 1]):
|
||||||
|
#
|
||||||
|
# A = chunking_load = median_intra_cmd_cv / CHUNKING_REF_CV
|
||||||
|
# B = error_load = errored_cmds / total_cmds
|
||||||
|
# C = pace_variability_load = (stdev / mean of inter_cmd_iats) / PACE_REF_CV
|
||||||
|
#
|
||||||
|
# load = mean(A, B, C); bucket:
|
||||||
|
# load < COGNITIVE_LOAD_LOW_MAX → low
|
||||||
|
# load < COGNITIVE_LOAD_MEDIUM_MAX → medium
|
||||||
|
# else → high
|
||||||
|
#
|
||||||
|
# v0.1 thresholds — D.8 re-tunes once D.1-D.7 are stable. The reference
|
||||||
|
# CVs (CHUNKING_REF_CV / PACE_REF_CV) are the value at which that single
|
||||||
|
# component saturates to a load contribution of 1.0; anything past
|
||||||
|
# saturates the term but doesn't double-count.
|
||||||
|
COGNITIVE_LOAD_CHUNKING_REF_CV: float = 1.00
|
||||||
|
COGNITIVE_LOAD_PACE_REF_CV: float = 1.50
|
||||||
|
COGNITIVE_LOAD_LOW_MAX: float = 0.33
|
||||||
|
COGNITIVE_LOAD_MEDIUM_MAX: float = 0.67
|
||||||
|
|
||||||
# ── motor.keystroke_cadence (Step B.1) ──────────────────────────────────────
|
# ── motor.keystroke_cadence (Step B.1) ──────────────────────────────────────
|
||||||
# Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between
|
# Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between
|
||||||
# commands don't inflate the within-burst CV. Mirrors the prototype's
|
# commands don't inflate the within-burst CV. Mirrors the prototype's
|
||||||
|
|||||||
88
tests/profiler/behave_shell/test_cognitive_cognitive_load.py
Normal file
88
tests/profiler/behave_shell/test_cognitive_cognitive_load.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
"""Step D.1: ``cognitive.cognitive_load``.
|
||||||
|
|
||||||
|
Composite of three [0, 1]-clipped sub-signals (chunking variance, error
|
||||||
|
rate, pace variability) → bucketed against COGNITIVE_LOAD_LOW_MAX /
|
||||||
|
COGNITIVE_LOAD_MEDIUM_MAX. Tests pin each component at its extremes and
|
||||||
|
confirm the bucket falls where the math says.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell import extract_session
|
||||||
|
from decnet.profiler.behave_shell._parse import AsciinemaEvent
|
||||||
|
|
||||||
|
|
||||||
|
def _of(observations: list, primitive: str):
|
||||||
|
obs = [o for o in observations if o.primitive == primitive]
|
||||||
|
assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
|
||||||
|
return obs[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _typed(text: str, t0: float = 0.0, dt: float = 0.05) -> list[AsciinemaEvent]:
|
||||||
|
return [(t0 + i * dt, "i", c) for i, c in enumerate(text)]
|
||||||
|
|
||||||
|
|
||||||
|
def _metronomic_clean_session(n: int = 8) -> list[AsciinemaEvent]:
|
||||||
|
"""``n`` commands, perfectly even pacing, zero errors, fluent typing."""
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(n):
|
||||||
|
events.extend(_typed("ls\r", t0=i * 1.0, dt=0.05))
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_commands_no_emission() -> None:
|
||||||
|
events: list[AsciinemaEvent] = [(0.0, "i", "a")]
|
||||||
|
out = list(extract_session(events, sid="cl-empty"))
|
||||||
|
assert [o for o in out if o.primitive == "cognitive.cognitive_load"] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_metronomic_clean_session_emits_low() -> None:
|
||||||
|
"""Even pacing + clean output + steady typing → low load."""
|
||||||
|
out = list(extract_session(_metronomic_clean_session(8), sid="cl-low"))
|
||||||
|
obs = _of(out, "cognitive.cognitive_load")
|
||||||
|
assert obs.value == "low"
|
||||||
|
|
||||||
|
|
||||||
|
def test_high_error_rate_drives_load_up() -> None:
|
||||||
|
"""Every command errored — error_load = 1.0 alone forces load >= 0.33."""
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(8):
|
||||||
|
events.extend(_typed("foo\r", t0=i * 1.0, dt=0.05))
|
||||||
|
events.append((i * 1.0 + 0.5, "o", "bash: foo: command not found\n"))
|
||||||
|
out = list(extract_session(events, sid="cl-err"))
|
||||||
|
obs = _of(out, "cognitive.cognitive_load")
|
||||||
|
assert obs.value in ("medium", "high")
|
||||||
|
|
||||||
|
|
||||||
|
def test_all_three_components_high_emits_high() -> None:
|
||||||
|
"""Saturate every component → load ≈ 1.0 → high."""
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
# Burst-then-gap pacing maximises pace-CV; mid-command jitter
|
||||||
|
# maximises chunking-CV; every command errors.
|
||||||
|
starts = [0.0, 0.1, 0.2, 30.0, 30.1, 60.0, 90.0, 90.1]
|
||||||
|
for i, s in enumerate(starts):
|
||||||
|
# Mid-command jitter: 'a' at s, 'b' 0.01s later, 'c' 2s later, '\r' 2.05s later
|
||||||
|
events.append((s, "i", "a"))
|
||||||
|
events.append((s + 0.01, "i", "b"))
|
||||||
|
events.append((s + 2.0, "i", "c"))
|
||||||
|
events.append((s + 2.05, "i", "\r"))
|
||||||
|
events.append((s + 2.10, "o", "bash: abc: command not found\n"))
|
||||||
|
out = list(extract_session(events, sid="cl-high"))
|
||||||
|
obs = _of(out, "cognitive.cognitive_load")
|
||||||
|
assert obs.value == "high"
|
||||||
|
|
||||||
|
|
||||||
|
def test_low_sample_count_reduces_confidence() -> None:
|
||||||
|
short = list(extract_session(_metronomic_clean_session(3), sid="cl-short"))
|
||||||
|
full = list(extract_session(_metronomic_clean_session(8), sid="cl-full"))
|
||||||
|
s = _of(short, "cognitive.cognitive_load")
|
||||||
|
f = _of(full, "cognitive.cognitive_load")
|
||||||
|
assert s.confidence < f.confidence
|
||||||
|
|
||||||
|
|
||||||
|
def test_pii_no_command_bodies_in_observation() -> None:
|
||||||
|
events: list[AsciinemaEvent] = []
|
||||||
|
for i in range(6):
|
||||||
|
events.extend(_typed("supersecret\r", t0=i * 1.0, dt=0.05))
|
||||||
|
out = list(extract_session(events, sid="cl-pii"))
|
||||||
|
obs = _of(out, "cognitive.cognitive_load")
|
||||||
|
assert "supersecret" not in obs.model_dump_json()
|
||||||
Reference in New Issue
Block a user