DECNET/decnet/profiler/behave_shell/_features/cognitive.py

"""``cognitive.*`` feature functions.

Step 5: ``cognitive.inter_command_latency_class``.
Step 6: ``cognitive.command_branch_diversity``.
Step 7: ``cognitive.feedback_loop_engagement``.
Step 8: ``cognitive.inter_command_consistency``.
"""
from __future__ import annotations

import statistics
from typing import Iterator

from decnet_behave_core.spec.envelope import Observation

from decnet.profiler.behave_shell._ctx import SessionContext
from decnet.profiler.behave_shell._features._emit import make_observation
from decnet.profiler.behave_shell._thresholds import (
    BRANCH_DIVERSITY_LINEAR_MIN,
    FEEDBACK_CORRELATION_MIN,
    FEEDBACK_MIN_PAIRS,
    INTER_CMD_DELIBERATE_MAX,
    INTER_CMD_INSTANT_MAX,
    INTER_CMD_LLM_HEAVYWEIGHT_MAX,
    INTER_CMD_LLM_LIGHTWEIGHT_MAX,
    INTER_CMD_TYPING_MAX,
    MIN_COMMANDS_FOR_FULL_CONFIDENCE,
    PAUSE_CV_BIMODAL_MIN,
    PAUSE_CV_METRONOMIC_MAX,
)


def _bucket_inter_cmd_latency(median_iat: float) -> str:
    if median_iat <= INTER_CMD_INSTANT_MAX:
        return "instant"
    if median_iat <= INTER_CMD_TYPING_MAX:
        return "typing_speed"
    if median_iat <= INTER_CMD_DELIBERATE_MAX:
        return "deliberate"
    if median_iat <= INTER_CMD_LLM_LIGHTWEIGHT_MAX:
        return "llm_lightweight"
    if median_iat <= INTER_CMD_LLM_HEAVYWEIGHT_MAX:
        return "llm_heavyweight"
    return "long"


def inter_command_latency_class(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.inter_command_latency_class``.

    Operator's *thinking pace* between commands, bucketed against
    calibrated thresholds. Splits LW-sim / CLAUDE-FF / CLAUDE-CL.
    """
    if not ctx.inter_cmd_iats:
        return
    median_iat = statistics.median(ctx.inter_cmd_iats)
    bucket = _bucket_inter_cmd_latency(median_iat)
    # Sample-size honesty: < 5 commands → halve confidence
    if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
        confidence = 0.40
    else:
        confidence = 0.80
    yield make_observation(
        ctx,
        primitive="cognitive.inter_command_latency_class",
        value=bucket,
        confidence=confidence,
    )


def command_branch_diversity(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.command_branch_diversity``.

    Content-based discriminator (no timing): unique first-token ratio
    over total commands. Splits CLAUDE-FF (linear_playbook) from
    CLAUDE-CL (adaptive_branching). The empirical anchor on
    2026-05-02: fire-and-forget runs ~10 distinct tools; closed-loop
    runs 5-6 with ``curl`` re-invoked as the operator chases threads.
    """
    n = len(ctx.commands)
    if n == 0:
        # No commands at all → nothing honest to say. Skip emission.
        return
    if n < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
        # Registry admits "unknown"; absence of *enough* data is itself
        # a high-confidence answer.
        yield make_observation(
            ctx,
            primitive="cognitive.command_branch_diversity",
            value="unknown",
            confidence=1.0,
        )
        return
    unique = len({c.first_token_hash for c in ctx.commands})
    ratio = unique / n
    if ratio >= BRANCH_DIVERSITY_LINEAR_MIN:
        value = "linear_playbook"
    else:
        # Anything below the linear floor is treated as adaptive — the
        # operator is reusing tools, the discriminative signal we
        # actually want.
        value = "adaptive_branching"
    yield make_observation(
        ctx,
        primitive="cognitive.command_branch_diversity",
        value=value,
        confidence=0.80,
    )


def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.feedback_loop_engagement``.

    Pearson correlation between ``output_per_cmd[i]`` (bytes the
    operator saw before the next command) and
    ``inter_cmd_iats[i]`` (the pause that followed). closed_loop
    operators read more before pausing more; fire_and_forget operators
    pace independently of output. CUTS ACROSS the LLM/human axis —
    closed-loop LLMs and reading humans both score closed_loop.

    First primitive that depends on output events: zero output events
    in the shard → emit ``unknown`` at confidence 1.0 (no honest
    correlation possible) and exit.
    """
    pairs = list(zip(ctx.output_per_cmd, ctx.inter_cmd_iats))
    if not ctx.output_events or len(pairs) < FEEDBACK_MIN_PAIRS:
        if not ctx.commands:
            return
        yield make_observation(
            ctx,
            primitive="cognitive.feedback_loop_engagement",
            value="unknown",
            confidence=1.0,
        )
        return
    xs = [float(p[0]) for p in pairs]
    ys = [float(p[1]) for p in pairs]
    try:
        r = statistics.correlation(xs, ys)
    except statistics.StatisticsError:
        # Constant series on either axis — correlation undefined.
        yield make_observation(
            ctx,
            primitive="cognitive.feedback_loop_engagement",
            value="unknown",
            confidence=1.0,
        )
        return
    if r > FEEDBACK_CORRELATION_MIN:
        value = "closed_loop"
    else:
        value = "fire_and_forget"
    yield make_observation(
        ctx,
        primitive="cognitive.feedback_loop_engagement",
        value=value,
        confidence=0.75,
    )


def inter_command_consistency(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.inter_command_consistency``.

    CV (stdev / mean) of inter-command IATs.

    * ``metronomic`` (CV < 0.40) → LLM-pure. Empirical anchor:
      LLM-simulated session CV ≈ 0.24 in this corpus.
    * ``variable`` (0.40 ≤ CV < 1.50) → human. Empirical anchor:
      human session CV ≈ 0.94.
    * ``bimodal`` (CV ≥ 1.50) → LLM-assisted human, heuristic. v0.1
      uses CV-only; true bimodal detection (Hartigan dip / two-peak)
      is filed for v0.2 per the registry's ``notes:`` field.
    """
    iats = ctx.inter_cmd_iats
    if len(iats) < 2:
        return
    mean = statistics.fmean(iats)
    if mean <= 0.0:
        return
    cv = statistics.stdev(iats) / mean
    if cv < PAUSE_CV_METRONOMIC_MAX:
        value = "metronomic"
    elif cv >= PAUSE_CV_BIMODAL_MIN:
        value = "bimodal"
    else:
        value = "variable"
    confidence = (
        0.40 if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE else 0.75
    )
    yield make_observation(
        ctx,
        primitive="cognitive.inter_command_consistency",
        value=value,
        confidence=confidence,
    )