diff --git a/development/BEHAVE-EXTRACTOR.md b/development/BEHAVE-EXTRACTOR.md index a9732d28..f2204871 100644 --- a/development/BEHAVE-EXTRACTOR.md +++ b/development/BEHAVE-EXTRACTOR.md @@ -679,15 +679,15 @@ unchecked = no v0 tag.** - [x] E.1 `temporal.session_duration` - [x] E.2 `temporal.escalation_pattern` - [x] E.3 `temporal.lifecycle_markers.landing_ritual` -- [ ] E.4 `temporal.lifecycle_markers.exit_behavior` — **HELD** pending Phase F.0's prompt/exit-code parser. abrupt-vs-cleanup needs exit-code visibility to be honest; first-token membership alone over-fires on benign `rm` / `clear` mid-session and under-fires on `history -c` (flag-detection crosses the v0.1 PII boundary). +- [x] E.4 `temporal.lifecycle_markers.exit_behavior` — unblocked + landed in Phase F (uses `Command.followed_by_prompt` from F.0) ### Phase F — `environmental.*` (output-stream block) -- [ ] F.0 Prompt-string parser (shared utility) — also unblocks **E.4** (held) and subsumes **D.0** ANSI/error helpers -- [ ] F.1 `environmental.shell_type` -- [ ] F.2 `environmental.terminal_multiplexer` -- [ ] F.3 `environmental.locale` -- [ ] F.4 `environmental.keyboard_layout` -- [ ] F.5 `environmental.numpad_usage` +- [x] F.0 Prompt-string parser (shared utility) — unblocked **E.4**; **D.0 enriched, not subsumed** (regex error helpers stay) +- [x] F.1 `environmental.shell_type` +- [x] F.2 `environmental.terminal_multiplexer` +- [x] F.3 `environmental.locale` +- [x] F.4 `environmental.keyboard_layout` (PII boundary lifted by ANTI; emits all 4 registry values) +- [x] F.5 `environmental.numpad_usage` ### Phase G — `operational.*` + `emotional_valence.*` (soft block) - [ ] G.0 Command-intent lexicon (`_features/_intent.py`) @@ -906,6 +906,56 @@ F.0's prompt parser) lands next; E.4 picks up at the tail of Phase F. --- +## Phase F completion log + +Closed in 8 commits. The largest phase in the plan; the held E.4 +(`temporal.lifecycle_markers.exit_behavior`) lifted at the tail. + +**F.0 — prompt-line detector (no primitive).** PS1 prompt-line +detection over ANSI-stripped output. New `PromptLine` dataclass on +`SessionContext.prompt_lines` and `Command.followed_by_prompt` +populated during the existing single-pass output-window walk. Capped +at `PROMPT_LINE_MAX_CHARS = 256` to bound memory. + +**Reversal of the original BEHAVE-EXTRACTOR.md F.0 hint:** D.0 is +**enriched, not subsumed**. The regex error fingerprints catch errors +even when PS1 echo is suppressed (custom prompts, non-interactive +exec) where prompt-based detection would miss. F.0 is purely +additive. + +**PII boundary lift.** ANTI authorised dropping the v0.1 PII boundary +for Phase F: PromptLine retains hostnames / cwd / etc. (capped), +parsed locale envvar values ride on observations, F.4 retains typed +bigram/unigram histograms on `SessionContext`. The discipline kept is +"no FULL command bodies, no FULL output bodies in observations" — +PromptLine and histograms live on ctx but are never serialised into +observation values; only derived primitive values (`bash`, `en-US`, +`qwerty`, `present`) leave the engine. + +The five Phase F primitives + carry-over E.4: + +| Primitive | Confidence | Source signal | +|---|---|---| +| `environmental.shell_type` | 0.40 / 0.75 | per-prompt-line classification; mode of suffix character with `>` disambiguated by content (`PS ` → powershell, `C:\` → cmd.exe, else fish) | +| `environmental.terminal_multiplexer` | 0.55 / 0.85 | scan RAW output for tmux markers (DCS passthrough, focus-reporting, window-title), screen markers (DCS, screen-OSC); both → prefer tmux | +| `environmental.locale` | 0.80 | regex match `LANG=` / `LC_ALL=` / `LC_CTYPE=` in stripped output; LC_ALL > LANG > LC_CTYPE; POSIX → BCP-47 normalisation | +| `environmental.keyboard_layout` | 0.40 / 0.55 | typed bigram/unigram histograms; layout-artefact unigrams (`q`, `z`/`y`) take priority over English-bigram saturation | +| `environmental.numpad_usage` | 0.50 | sliding window over single-char digit input events; ≥4 contiguous events with all-fast IATs (≤50ms) → detected | +| `temporal.lifecycle_markers.exit_behavior` | 0.45 / 0.65 | resolution of the E.4 hold; uses `Command.followed_by_prompt` to distinguish `abrupt` from `cleanup`/`graceful` | + +**Calibration grid widened:** the binding set now contains 25 names +(`PHASE_ABCDEF_PRIMITIVES`). The three Phase D `error_resilience.*` +primitives stay in `PHASE_D_CONDITIONAL_PRIMITIVES`; +`environmental.locale` joins a new `PHASE_F_CONDITIONAL_PRIMITIVES` +since it only fires on shards containing an env / locale dump. + +**Tier-A corpus delta:** 25 of 37 Tier-A primitives now emit. Phase G +(`operational.*` + `emotional_valence.*`, 8 primitives + the +command-intent lexicon) lands next. Phase H is full-corpus lockdown ++ v0 release. + +--- + **Owner:** ANTI. **Implementation gate:** Step 0 starts after this doc is reviewed + Phase 1 of `BEHAVE-INTEGRATION.md` lands (storage table exists). diff --git a/tests/profiler/behave_shell/test_calibration_grid.py b/tests/profiler/behave_shell/test_calibration_grid.py index f69df617..8ddee2e7 100644 --- a/tests/profiler/behave_shell/test_calibration_grid.py +++ b/tests/profiler/behave_shell/test_calibration_grid.py @@ -31,7 +31,7 @@ from decnet.profiler.behave_shell import extract_session from decnet.profiler.behave_shell._parse import parse_shard_line -PHASE_ABCDE_PRIMITIVES: frozenset[str] = frozenset({ +PHASE_ABCDEF_PRIMITIVES: frozenset[str] = frozenset({ # Phase A — calibration floor "motor.input_modality", "motor.paste_burst_rate", @@ -39,32 +39,32 @@ PHASE_ABCDE_PRIMITIVES: frozenset[str] = frozenset({ "cognitive.command_branch_diversity", "cognitive.feedback_loop_engagement", "cognitive.inter_command_consistency", - # Phase B — motor.* completion (lands one primitive per commit) + # Phase B — motor.* completion "motor.keystroke_cadence", "motor.motor_stability", "motor.error_correction", "motor.command_chunking", - # Phase C — motor.shell_mastery.* (lands one primitive per commit) + # Phase C — motor.shell_mastery.* "motor.shell_mastery.tab_completion", "motor.shell_mastery.shortcut_usage", "motor.shell_mastery.pipe_chaining_depth", - # Phase D — cognitive.* completion (one primitive per commit) + # Phase D — cognitive.* completion (error_resilience.* are + # conditional, see PHASE_D_CONDITIONAL_PRIMITIVES below) "cognitive.cognitive_load", "cognitive.exploration_style", "cognitive.planning_depth", "cognitive.tool_vocabulary", - # Phase D — error_resilience.* primitives only fire on shards with - # at least one errored command. They're NOT in the per-shard hard - # gate (which would force every calibration shard to contain a - # syntax error or missing-binary invocation just to satisfy the - # test). They ARE included in the discrimination check below as - # "if you have them, they should agree across-class". - # Phase E — temporal.* per-session subset (E.4 exit_behavior held - # pending Phase F.0's prompt parser; abrupt-vs-cleanup needs - # exit-code visibility to be honest). + # Phase E — temporal.* per-session subset "temporal.session_duration", "temporal.escalation_pattern", "temporal.lifecycle_markers.landing_ritual", + # Phase F — environmental.* output-stream block + carry-over E.4 + # (locale is conditional, see PHASE_F_CONDITIONAL_PRIMITIVES) + "environmental.shell_type", + "environmental.terminal_multiplexer", + "environmental.keyboard_layout", + "environmental.numpad_usage", + "temporal.lifecycle_markers.exit_behavior", }) # Phase D primitives that are conditional on at least one errored @@ -77,11 +77,20 @@ PHASE_D_CONDITIONAL_PRIMITIVES: frozenset[str] = frozenset({ "cognitive.error_resilience.fallback_to_man", }) +# Phase F primitives conditional on shard content. ``environmental.locale`` +# fires only when the shard's output contains an env / locale dump +# (LANG=, LC_ALL=, LC_CTYPE=). It's tracked here, not in the per-shard +# hard gate. +PHASE_F_CONDITIONAL_PRIMITIVES: frozenset[str] = frozenset({ + "environmental.locale", +}) + # Backwards-compatible aliases for any external import — earlier phases # locked in narrower sets; later phases widen them. All names point at # the current binding set. -PHASE_ABCD_PRIMITIVES = PHASE_ABCDE_PRIMITIVES -PHASE_ABC_PRIMITIVES = PHASE_ABCDE_PRIMITIVES +PHASE_ABCDE_PRIMITIVES = PHASE_ABCDEF_PRIMITIVES +PHASE_ABCD_PRIMITIVES = PHASE_ABCDEF_PRIMITIVES +PHASE_ABC_PRIMITIVES = PHASE_ABCDEF_PRIMITIVES # (shard filename, class label) @@ -148,7 +157,7 @@ def test_shard_emits_all_phase_a_primitives( obs = _all_observations(path) assert obs, f"{class_label}: extractor produced zero observations" seen = {o.primitive for o in obs} - missing = PHASE_ABCDE_PRIMITIVES - seen + missing = PHASE_ABCDEF_PRIMITIVES - seen assert not missing, ( f"{class_label} ({shard_file}) missing primitives: " f"{sorted(missing)}" @@ -185,7 +194,7 @@ def test_shards_are_discriminative_across_classes( # At least one primitive should produce different majority values # across the present classes. discriminative_primitives: list[str] = [] - for prim in PHASE_ABCDE_PRIMITIVES: + for prim in PHASE_ABCDEF_PRIMITIVES: values = {by_class[c].get(prim) for c in by_class if prim in by_class[c]} if len(values) >= 2: discriminative_primitives.append(prim)