From 3fc6ea5f75a665f35f5c77abe6455b43b04d4ec2 Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 3 May 2026 07:54:13 -0400 Subject: [PATCH] feat(profiler/behave_shell): emit cognitive.command_branch_diversity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BEHAVE-EXTRACTOR.md Phase A Step 6. Content-based playbook-vs- adaptive split. Splits CLAUDE-FF (linear_playbook, ~10 distinct tools) from CLAUDE-CL (adaptive_branching, 5-6 tools with curl re-invoked) per the 2026-05-02 empirical anchor. * _features/cognitive.py:command_branch_diversity(ctx) emits one Observation in {linear_playbook, adaptive_branching, unknown}. * unique_first_token_hashes / total_commands ratio. ≥ 0.80 → linear_playbook, otherwise adaptive_branching (the doc instructs bias-to-adaptive in the middle band — that's the discriminative signal we actually want). * < 5 commands → "unknown" at confidence 1.0 (the absence of data is itself a high-confidence answer per the registry's allowed vocabulary). Zero-command session skips emission entirely. Tests cover unique-tokens → linear, repeated-tokens → adaptive, middle band → adaptive (bias), under-floor → unknown @ 1.0, plus PII regression: raw tokens never appear in the serialised observation. --- .../behave_shell/_features/__init__.py | 2 + .../behave_shell/_features/cognitive.py | 41 ++++++++++++ ...test_cognitive_command_branch_diversity.py | 64 +++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 tests/profiler/behave_shell/test_cognitive_command_branch_diversity.py diff --git a/decnet/profiler/behave_shell/_features/__init__.py b/decnet/profiler/behave_shell/_features/__init__.py index 7cd4640e..ae5d721f 100644 --- a/decnet/profiler/behave_shell/_features/__init__.py +++ b/decnet/profiler/behave_shell/_features/__init__.py @@ -12,6 +12,7 @@ from decnet_behave_core.spec.envelope import Observation from decnet.profiler.behave_shell._ctx import SessionContext from decnet.profiler.behave_shell._features.cognitive import ( + command_branch_diversity, inter_command_latency_class, ) from decnet.profiler.behave_shell._features.motor import ( @@ -25,4 +26,5 @@ FEATURES: tuple[FeatureFn, ...] = ( input_modality, paste_burst_rate, inter_command_latency_class, + command_branch_diversity, ) diff --git a/decnet/profiler/behave_shell/_features/cognitive.py b/decnet/profiler/behave_shell/_features/cognitive.py index cb14d0f7..68581160 100644 --- a/decnet/profiler/behave_shell/_features/cognitive.py +++ b/decnet/profiler/behave_shell/_features/cognitive.py @@ -15,6 +15,7 @@ from decnet_behave_core.spec.envelope import Observation from decnet.profiler.behave_shell._ctx import SessionContext from decnet.profiler.behave_shell._features._emit import make_observation from decnet.profiler.behave_shell._thresholds import ( + BRANCH_DIVERSITY_LINEAR_MIN, INTER_CMD_DELIBERATE_MAX, INTER_CMD_INSTANT_MAX, INTER_CMD_LLM_HEAVYWEIGHT_MAX, @@ -59,3 +60,43 @@ def inter_command_latency_class(ctx: SessionContext) -> Iterator[Observation]: value=bucket, confidence=confidence, ) + + +def command_branch_diversity(ctx: SessionContext) -> Iterator[Observation]: + """Emit ``cognitive.command_branch_diversity``. + + Content-based discriminator (no timing): unique first-token ratio + over total commands. Splits CLAUDE-FF (linear_playbook) from + CLAUDE-CL (adaptive_branching). The empirical anchor on + 2026-05-02: fire-and-forget runs ~10 distinct tools; closed-loop + runs 5-6 with ``curl`` re-invoked as the operator chases threads. + """ + n = len(ctx.commands) + if n == 0: + # No commands at all → nothing honest to say. Skip emission. + return + if n < MIN_COMMANDS_FOR_FULL_CONFIDENCE: + # Registry admits "unknown"; absence of *enough* data is itself + # a high-confidence answer. + yield make_observation( + ctx, + primitive="cognitive.command_branch_diversity", + value="unknown", + confidence=1.0, + ) + return + unique = len({c.first_token_hash for c in ctx.commands}) + ratio = unique / n + if ratio >= BRANCH_DIVERSITY_LINEAR_MIN: + value = "linear_playbook" + else: + # Anything below the linear floor is treated as adaptive — the + # operator is reusing tools, the discriminative signal we + # actually want. + value = "adaptive_branching" + yield make_observation( + ctx, + primitive="cognitive.command_branch_diversity", + value=value, + confidence=0.80, + ) diff --git a/tests/profiler/behave_shell/test_cognitive_command_branch_diversity.py b/tests/profiler/behave_shell/test_cognitive_command_branch_diversity.py new file mode 100644 index 00000000..a52a16b9 --- /dev/null +++ b/tests/profiler/behave_shell/test_cognitive_command_branch_diversity.py @@ -0,0 +1,64 @@ +"""Step 6: ``cognitive.command_branch_diversity``.""" +from __future__ import annotations + +from decnet.profiler.behave_shell import extract_session +from decnet.profiler.behave_shell._parse import AsciinemaEvent + + +def _of(observations: list, primitive: str): + obs = [o for o in observations if o.primitive == primitive] + assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}" + return obs[0] + + +def _commands(first_tokens: list[str]) -> list[AsciinemaEvent]: + """One command per token, well-spaced.""" + events: list[AsciinemaEvent] = [] + t = 0.0 + for tok in first_tokens: + events.append((t, "i", f"{tok} arg\r")) + t += 1.0 + return events + + +def test_under_floor_emits_unknown_high_confidence() -> None: + out = list(extract_session(_commands(["ls", "ps", "id"]), sid="bd-low")) + obs = _of(out, "cognitive.command_branch_diversity") + assert obs.value == "unknown" + assert obs.confidence == 1.0 + + +def test_unique_first_tokens_emit_linear_playbook() -> None: + # 8 distinct tools — ratio 1.0 → linear_playbook + tokens = ["uname", "id", "whoami", "pwd", "ls", "ps", "netstat", "ss"] + out = list(extract_session(_commands(tokens), sid="bd-linear")) + obs = _of(out, "cognitive.command_branch_diversity") + assert obs.value == "linear_playbook" + assert obs.confidence == 0.80 + + +def test_repeated_first_tokens_emit_adaptive_branching() -> None: + # 8 commands, only 3 distinct — ratio 0.375 < 0.60 + tokens = ["curl", "curl", "curl", "ls", "curl", "ls", "curl", "ps"] + out = list(extract_session(_commands(tokens), sid="bd-adaptive")) + obs = _of(out, "cognitive.command_branch_diversity") + assert obs.value == "adaptive_branching" + + +def test_middle_band_biases_to_adaptive() -> None: + # 7 commands, 5 unique → ratio ≈ 0.71 — between 0.60 and 0.80. + # The doc instructs us to bias to adaptive in the ambiguous middle. + tokens = ["a", "b", "c", "d", "e", "a", "b"] + out = list(extract_session(_commands(tokens), sid="bd-mid")) + obs = _of(out, "cognitive.command_branch_diversity") + assert obs.value == "adaptive_branching" + + +def test_pii_no_command_bodies_in_observation() -> None: + out = list(extract_session(_commands( + ["secret_arg_payload"] * 6, + ), sid="bd-pii")) + obs = _of(out, "cognitive.command_branch_diversity") + # Whatever the verdict, the raw token must not be in the dump + serialised = obs.model_dump_json() + assert "secret_arg_payload" not in serialised