BEHAVE-EXTRACTOR.md Phase A Step 8. Dispersion / bimodality of
inter-command pauses. HUMAN-bimodal vs LLM-metronomic.
* _features/cognitive.py:inter_command_consistency(ctx) emits one
Observation in {metronomic, variable, bimodal}.
* CV = stdev / mean of ctx.inter_cmd_iats. CV < 0.40 → metronomic
(LLM-pure; corpus anchor 0.24); CV ≥ 1.50 → bimodal heuristic
(LLM-assisted human; v0.1 placeholder, true bimodal via Hartigan
dip is registry-flagged for v0.2); else → variable (human;
corpus anchor 0.94).
* < 2 IATs or zero mean → skip emission. < 5 commands halves
confidence (0.40 vs 0.75) per sample-size honesty.
Tests: too-few IATs → no emission, uniform → metronomic,
human-like dispersion → variable, extreme bursts+gaps → bimodal,
low-sample-count → reduced confidence.
Step 8 closes the six-primitive calibration floor for Phase A.
Step 9 (calibration grid lockdown) is the gate that pins it.
194 lines
6.4 KiB
Python
194 lines
6.4 KiB
Python
"""``cognitive.*`` feature functions.
|
|
|
|
Step 5: ``cognitive.inter_command_latency_class``.
|
|
Step 6: ``cognitive.command_branch_diversity``.
|
|
Step 7: ``cognitive.feedback_loop_engagement``.
|
|
Step 8: ``cognitive.inter_command_consistency``.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import statistics
|
|
from typing import Iterator
|
|
|
|
from decnet_behave_core.spec.envelope import Observation
|
|
|
|
from decnet.profiler.behave_shell._ctx import SessionContext
|
|
from decnet.profiler.behave_shell._features._emit import make_observation
|
|
from decnet.profiler.behave_shell._thresholds import (
|
|
BRANCH_DIVERSITY_LINEAR_MIN,
|
|
FEEDBACK_CORRELATION_MIN,
|
|
FEEDBACK_MIN_PAIRS,
|
|
INTER_CMD_DELIBERATE_MAX,
|
|
INTER_CMD_INSTANT_MAX,
|
|
INTER_CMD_LLM_HEAVYWEIGHT_MAX,
|
|
INTER_CMD_LLM_LIGHTWEIGHT_MAX,
|
|
INTER_CMD_TYPING_MAX,
|
|
MIN_COMMANDS_FOR_FULL_CONFIDENCE,
|
|
PAUSE_CV_BIMODAL_MIN,
|
|
PAUSE_CV_METRONOMIC_MAX,
|
|
)
|
|
|
|
|
|
def _bucket_inter_cmd_latency(median_iat: float) -> str:
|
|
if median_iat <= INTER_CMD_INSTANT_MAX:
|
|
return "instant"
|
|
if median_iat <= INTER_CMD_TYPING_MAX:
|
|
return "typing_speed"
|
|
if median_iat <= INTER_CMD_DELIBERATE_MAX:
|
|
return "deliberate"
|
|
if median_iat <= INTER_CMD_LLM_LIGHTWEIGHT_MAX:
|
|
return "llm_lightweight"
|
|
if median_iat <= INTER_CMD_LLM_HEAVYWEIGHT_MAX:
|
|
return "llm_heavyweight"
|
|
return "long"
|
|
|
|
|
|
def inter_command_latency_class(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``cognitive.inter_command_latency_class``.
|
|
|
|
Operator's *thinking pace* between commands, bucketed against
|
|
calibrated thresholds. Splits LW-sim / CLAUDE-FF / CLAUDE-CL.
|
|
"""
|
|
if not ctx.inter_cmd_iats:
|
|
return
|
|
median_iat = statistics.median(ctx.inter_cmd_iats)
|
|
bucket = _bucket_inter_cmd_latency(median_iat)
|
|
# Sample-size honesty: < 5 commands → halve confidence
|
|
if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
|
confidence = 0.40
|
|
else:
|
|
confidence = 0.80
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="cognitive.inter_command_latency_class",
|
|
value=bucket,
|
|
confidence=confidence,
|
|
)
|
|
|
|
|
|
def command_branch_diversity(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``cognitive.command_branch_diversity``.
|
|
|
|
Content-based discriminator (no timing): unique first-token ratio
|
|
over total commands. Splits CLAUDE-FF (linear_playbook) from
|
|
CLAUDE-CL (adaptive_branching). The empirical anchor on
|
|
2026-05-02: fire-and-forget runs ~10 distinct tools; closed-loop
|
|
runs 5-6 with ``curl`` re-invoked as the operator chases threads.
|
|
"""
|
|
n = len(ctx.commands)
|
|
if n == 0:
|
|
# No commands at all → nothing honest to say. Skip emission.
|
|
return
|
|
if n < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
|
# Registry admits "unknown"; absence of *enough* data is itself
|
|
# a high-confidence answer.
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="cognitive.command_branch_diversity",
|
|
value="unknown",
|
|
confidence=1.0,
|
|
)
|
|
return
|
|
unique = len({c.first_token_hash for c in ctx.commands})
|
|
ratio = unique / n
|
|
if ratio >= BRANCH_DIVERSITY_LINEAR_MIN:
|
|
value = "linear_playbook"
|
|
else:
|
|
# Anything below the linear floor is treated as adaptive — the
|
|
# operator is reusing tools, the discriminative signal we
|
|
# actually want.
|
|
value = "adaptive_branching"
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="cognitive.command_branch_diversity",
|
|
value=value,
|
|
confidence=0.80,
|
|
)
|
|
|
|
|
|
def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``cognitive.feedback_loop_engagement``.
|
|
|
|
Pearson correlation between ``output_per_cmd[i]`` (bytes the
|
|
operator saw before the next command) and
|
|
``inter_cmd_iats[i]`` (the pause that followed). closed_loop
|
|
operators read more before pausing more; fire_and_forget operators
|
|
pace independently of output. CUTS ACROSS the LLM/human axis —
|
|
closed-loop LLMs and reading humans both score closed_loop.
|
|
|
|
First primitive that depends on output events: zero output events
|
|
in the shard → emit ``unknown`` at confidence 1.0 (no honest
|
|
correlation possible) and exit.
|
|
"""
|
|
pairs = list(zip(ctx.output_per_cmd, ctx.inter_cmd_iats))
|
|
if not ctx.output_events or len(pairs) < FEEDBACK_MIN_PAIRS:
|
|
if not ctx.commands:
|
|
return
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="cognitive.feedback_loop_engagement",
|
|
value="unknown",
|
|
confidence=1.0,
|
|
)
|
|
return
|
|
xs = [float(p[0]) for p in pairs]
|
|
ys = [float(p[1]) for p in pairs]
|
|
try:
|
|
r = statistics.correlation(xs, ys)
|
|
except statistics.StatisticsError:
|
|
# Constant series on either axis — correlation undefined.
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="cognitive.feedback_loop_engagement",
|
|
value="unknown",
|
|
confidence=1.0,
|
|
)
|
|
return
|
|
if r > FEEDBACK_CORRELATION_MIN:
|
|
value = "closed_loop"
|
|
else:
|
|
value = "fire_and_forget"
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="cognitive.feedback_loop_engagement",
|
|
value=value,
|
|
confidence=0.75,
|
|
)
|
|
|
|
|
|
def inter_command_consistency(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``cognitive.inter_command_consistency``.
|
|
|
|
CV (stdev / mean) of inter-command IATs.
|
|
|
|
* ``metronomic`` (CV < 0.40) → LLM-pure. Empirical anchor:
|
|
LLM-simulated session CV ≈ 0.24 in this corpus.
|
|
* ``variable`` (0.40 ≤ CV < 1.50) → human. Empirical anchor:
|
|
human session CV ≈ 0.94.
|
|
* ``bimodal`` (CV ≥ 1.50) → LLM-assisted human, heuristic. v0.1
|
|
uses CV-only; true bimodal detection (Hartigan dip / two-peak)
|
|
is filed for v0.2 per the registry's ``notes:`` field.
|
|
"""
|
|
iats = ctx.inter_cmd_iats
|
|
if len(iats) < 2:
|
|
return
|
|
mean = statistics.fmean(iats)
|
|
if mean <= 0.0:
|
|
return
|
|
cv = statistics.stdev(iats) / mean
|
|
if cv < PAUSE_CV_METRONOMIC_MAX:
|
|
value = "metronomic"
|
|
elif cv >= PAUSE_CV_BIMODAL_MIN:
|
|
value = "bimodal"
|
|
else:
|
|
value = "variable"
|
|
confidence = (
|
|
0.40 if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE else 0.75
|
|
)
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="cognitive.inter_command_consistency",
|
|
value=value,
|
|
confidence=confidence,
|
|
)
|