diff --git a/development/BEHAVE-EXTRACTOR.md b/development/BEHAVE-EXTRACTOR.md index 62a0867a..b9a9ce92 100644 --- a/development/BEHAVE-EXTRACTOR.md +++ b/development/BEHAVE-EXTRACTOR.md @@ -662,10 +662,10 @@ unchecked = no v0 tag.** - [x] D.8 cognitive.cognitive_load re-tune (gate) ### Phase E — `temporal.*` per-session -- [ ] E.1 `temporal.session_duration` -- [ ] E.2 `temporal.escalation_pattern` -- [ ] E.3 `temporal.lifecycle_markers.landing_ritual` -- [ ] E.4 `temporal.lifecycle_markers.exit_behavior` +- [x] E.1 `temporal.session_duration` +- [x] E.2 `temporal.escalation_pattern` +- [x] E.3 `temporal.lifecycle_markers.landing_ritual` +- [ ] E.4 `temporal.lifecycle_markers.exit_behavior` — **HELD** pending Phase F.0's prompt/exit-code parser. abrupt-vs-cleanup needs exit-code visibility to be honest; first-token membership alone over-fires on benign `rm` / `clear` mid-session and under-fires on `history -c` (flag-detection crosses the v0.1 PII boundary). ### Phase F — `environmental.*` (output-stream block) - [ ] F.0 Prompt-string parser (shared utility) @@ -849,6 +849,49 @@ Phase E (`temporal.*` per-session subset, 4 primitives) lands next. --- +## Phase E completion log + +Closed in 4 commits, **3 of 4 primitives shipping**. ANTI ruled E.4 +(`temporal.lifecycle_markers.exit_behavior`) **held** at planning +time: the abrupt / graceful / cleanup distinction needs exit-code +visibility, and that infrastructure lands as part of Phase F.0's +prompt parser. First-token membership alone is too noisy in both +directions (`rm` / `clear` mid-session over-fire as cleanup; `history +-c` under-fires because flag detection crosses v0.1's PII boundary). +E.4 unblocks once F.0's PS1 + exit-code sniff is wired. + +The three Phase E primitives that did ship: + +| Primitive | Confidence | Source signal | +|---|---|---| +| `temporal.session_duration` | 0.85 | `ctx.duration_s` bucketed against 60s / 600s / 3600s; direct measurement, not an inference. | +| `temporal.escalation_pattern` | 0.40 / 0.60 | Non-overlapping windows of width `max(10s, duration_s/10)`; CV of per-window counts + zero-window fraction → bursty / sustained / erratic. | +| `temporal.lifecycle_markers.landing_ritual` | 0.40 / 0.65 | Hits in first `N=5` commands against precomputed hashes of `{uname, id, whoami, pwd, hostname, w, who}`; `≥ K=2` hits → present. | + +Implementation note: the new `_features/temporal.py` module mirrors +the `_features/cognitive.py` layout; recon-vocabulary hashes are +precomputed at module load (single sha256 sweep at import) so the +hot path is a frozenset membership test. `math.ceil`-based window +counting in E.2 avoids a phantom trailing zero bin on clean +divisions — a real bug that test_temporal_escalation_pattern.py's +erratic-case fixture flushed out during initial run. + +PII discipline preserved across all three: only counts, durations, +and category labels leave the helpers; no command bodies, no output +text, no operator-identifying data. + +**Calibration grid widened:** the binding set now contains 20 names +(`PHASE_ABCDE_PRIMITIVES`). The three Phase D `error_resilience.*` +primitives remain conditional in `PHASE_D_CONDITIONAL_PRIMITIVES` +(only fire on shards with at least one errored command). E.4 is +explicitly **not** in either set — it must not be referenced as a +required primitive until Phase F.0 lands. + +Phase F (`environmental.*` output-stream block, 5 primitives plus +F.0's prompt parser) lands next; E.4 picks up at the tail of Phase F. + +--- + **Owner:** ANTI. **Implementation gate:** Step 0 starts after this doc is reviewed + Phase 1 of `BEHAVE-INTEGRATION.md` lands (storage table exists). diff --git a/tests/profiler/behave_shell/test_calibration_grid.py b/tests/profiler/behave_shell/test_calibration_grid.py index 3cd19fd9..f69df617 100644 --- a/tests/profiler/behave_shell/test_calibration_grid.py +++ b/tests/profiler/behave_shell/test_calibration_grid.py @@ -31,7 +31,7 @@ from decnet.profiler.behave_shell import extract_session from decnet.profiler.behave_shell._parse import parse_shard_line -PHASE_ABCD_PRIMITIVES: frozenset[str] = frozenset({ +PHASE_ABCDE_PRIMITIVES: frozenset[str] = frozenset({ # Phase A — calibration floor "motor.input_modality", "motor.paste_burst_rate", @@ -59,6 +59,12 @@ PHASE_ABCD_PRIMITIVES: frozenset[str] = frozenset({ # syntax error or missing-binary invocation just to satisfy the # test). They ARE included in the discrimination check below as # "if you have them, they should agree across-class". + # Phase E — temporal.* per-session subset (E.4 exit_behavior held + # pending Phase F.0's prompt parser; abrupt-vs-cleanup needs + # exit-code visibility to be honest). + "temporal.session_duration", + "temporal.escalation_pattern", + "temporal.lifecycle_markers.landing_ritual", }) # Phase D primitives that are conditional on at least one errored @@ -71,10 +77,11 @@ PHASE_D_CONDITIONAL_PRIMITIVES: frozenset[str] = frozenset({ "cognitive.error_resilience.fallback_to_man", }) -# Backwards-compatible alias for any external import — the prior phase -# locked in PHASE_ABC_PRIMITIVES; D widens it. Both names point at the -# current binding set. -PHASE_ABC_PRIMITIVES = PHASE_ABCD_PRIMITIVES +# Backwards-compatible aliases for any external import — earlier phases +# locked in narrower sets; later phases widen them. All names point at +# the current binding set. +PHASE_ABCD_PRIMITIVES = PHASE_ABCDE_PRIMITIVES +PHASE_ABC_PRIMITIVES = PHASE_ABCDE_PRIMITIVES # (shard filename, class label) @@ -141,7 +148,7 @@ def test_shard_emits_all_phase_a_primitives( obs = _all_observations(path) assert obs, f"{class_label}: extractor produced zero observations" seen = {o.primitive for o in obs} - missing = PHASE_ABCD_PRIMITIVES - seen + missing = PHASE_ABCDE_PRIMITIVES - seen assert not missing, ( f"{class_label} ({shard_file}) missing primitives: " f"{sorted(missing)}" @@ -178,7 +185,7 @@ def test_shards_are_discriminative_across_classes( # At least one primitive should produce different majority values # across the present classes. discriminative_primitives: list[str] = [] - for prim in PHASE_ABCD_PRIMITIVES: + for prim in PHASE_ABCDE_PRIMITIVES: values = {by_class[c].get(prim) for c in by_class if prim in by_class[c]} if len(values) >= 2: discriminative_primitives.append(prim)