feat(profiler/behave_shell): G.1 operational.objective

Per-command intent classification via the G.0 lexicon (`destructive > persistence > exfil > lateral > recon` precedence); majority vote across classified commands. Skip emission below INTENT_MIN_COMMANDS=3 classified hits. Confidence 0.40 below INTENT_FULL_CONFIDENCE_MIN=6, 0.60 above.
2026-05-08 16:28:45 -04:00
parent 289a64014c
commit c11f3605be
4 changed files with 232 additions and 0 deletions
--- a/decnet/profiler/behave_shell/_features/init.py
+++ b/decnet/profiler/behave_shell/_features/init.py
@@ -31,6 +31,9 @@ from decnet.profiler.behave_shell._features.environmental import (
    shell_type,
    terminal_multiplexer,
 )
+from decnet.profiler.behave_shell._features.operational import (
+    objective,
+)
 from decnet.profiler.behave_shell._features.temporal import (
    escalation_pattern,
    exit_behavior,
@@ -81,4 +84,5 @@ FEATURES: tuple[FeatureFn, ...] = (
    locale,
    keyboard_layout,
    numpad_usage,
+    objective,
 )
--- a/decnet/profiler/behave_shell/_features/operational.py
+++ b/decnet/profiler/behave_shell/_features/operational.py
@@ -0,0 +1,62 @@
+"""``operational.*`` feature functions (Phase G).
+
+Step G.1: ``operational.objective``.
+Step G.2: ``operational.opsec_discipline`` (lands later).
+Step G.3: ``operational.cleanup_behavior`` (lands later).
+Step G.4: ``operational.multi_actor_indicators`` (lands later).
+"""
+from __future__ import annotations
+
+import collections
+from typing import Iterator
+
+from decnet_behave_core.spec.envelope import Observation
+
+from decnet.profiler.behave_shell._ctx import SessionContext
+from decnet.profiler.behave_shell._features._emit import make_observation
+from decnet.profiler.behave_shell._intent import classify_intent
+from decnet.profiler.behave_shell._thresholds import (
+    INTENT_FULL_CONFIDENCE_MIN,
+    INTENT_MIN_COMMANDS,
+)
+
+
+def objective(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``operational.objective`` ∈ {recon, exfil, persistence,
+    lateral, destructive}.
+
+    Walk every command's ``first_token_hash`` through
+    :func:`classify_intent` (fixed precedence:
+    ``destructive > persistence > exfil > lateral > recon``).
+    Commands that don't classify (token not in any set) are skipped —
+    the registry has no ``unknown`` value here, so a session of pure
+    ``vim`` / ``ls`` operations is allowed to fall through and emit
+    ``recon`` only if at least :data:`INTENT_MIN_COMMANDS` commands
+    actually classify.
+
+    Skip emission when fewer than ``INTENT_MIN_COMMANDS`` classified
+    hits — too thin to call. Otherwise majority vote (ties broken by
+    precedence order via ``most_common(1)``-stable sort over the
+    insertion order, which mirrors the precedence walk).
+
+    Confidence: 0.40 below :data:`INTENT_FULL_CONFIDENCE_MIN`; 0.60
+    above. v0.1 lexicon — corpus tuning revisits in v0.2.
+    """
+    if not ctx.commands:
+        return
+    counter: collections.Counter[str] = collections.Counter()
+    for cmd in ctx.commands:
+        label = classify_intent(cmd.first_token_hash)
+        if label is not None:
+            counter[label] += 1
+    n_classified = sum(counter.values())
+    if n_classified < INTENT_MIN_COMMANDS:
+        return
+    value = counter.most_common(1)[0][0]
+    confidence = 0.60 if n_classified >= INTENT_FULL_CONFIDENCE_MIN else 0.40
+    yield make_observation(
+        ctx,
+        primitive="operational.objective",
+        value=value,
+        confidence=confidence,
+    )
--- a/decnet/profiler/behave_shell/_thresholds.py
+++ b/decnet/profiler/behave_shell/_thresholds.py
@@ -364,3 +364,53 @@ SHELL_MASTERY_MIN_COMMANDS: int = 5
 # used by Phase C primitives. ±10% of the boundary value drops
 # confidence by 0.20 per BEHAVE-EXTRACTOR.md §"Threshold proximity".
 SHELL_MASTERY_BOUNDARY_BAND: float = 0.10
+
+# ── operational.objective (Step G.1) ───────────────────────────────────────
+# Below this many *classified* commands, skip emission — too few hits
+# in any intent set to honestly call a session's objective.
+INTENT_MIN_COMMANDS: int = 3
+# At/above this many classified commands, raise confidence from 0.40 to
+# 0.60. Both v0.1 numbers; corpus tuning lands later.
+INTENT_FULL_CONFIDENCE_MIN: int = 6
+
+# ── operational.cleanup_behavior (Step G.3) ───────────────────────────────
+# Window of trailing commands inspected for cleanup-family hashes.
+CLEANUP_TAIL_K: int = 5
+# Distinct cleanup-family hashes in the tail required to call ``thorough``.
+CLEANUP_THOROUGH_MIN_DISTINCT: int = 3
+
+# ── operational.multi_actor_indicators (Step G.4) ─────────────────────────
+# Total command floor — below this we skip emission (no honest first-half
+# / second-half split).
+MULTI_ACTOR_MIN_COMMANDS: int = 8
+# Each half needs at least this many commands to compute a stable median.
+MULTI_ACTOR_HALF_MIN_COMMANDS: int = 4
+# Relative delta between the two halves' median IATs that flips the
+# verdict to ``handoff_detected``.
+MULTI_ACTOR_HANDOFF_DELTA: float = 0.50
+
+# ── emotional_valence.* hard confidence cap (Phase G) ─────────────────────
+# Registry-pinned ceiling for the four soft primitives. Enforced inside
+# each ``emotional_valence.*`` feature function (not in make_observation).
+EMOTIONAL_VALENCE_CONFIDENCE_CAP: float = 0.50
+
+# ── emotional_valence.valence (Step G.5) ──────────────────────────────────
+VALENCE_MIN_TYPED_CHARS: int = 80
+VALENCE_FULL_CONFIDENCE_MIN: int = 200
+VALENCE_MIN_HITS: int = 2
+
+# ── emotional_valence.arousal (Step G.6) ──────────────────────────────────
+AROUSAL_FAST_IAT_S: float = 0.06
+AROUSAL_CALM_IAT_S: float = 0.30
+AROUSAL_MIN_IATS: int = 30
+AROUSAL_CAPS_RUN_MIN: int = 5
+AROUSAL_BANG_RUN_MIN: int = 3
+
+# ── emotional_valence.stress_response (Step G.7) ──────────────────────────
+STRESS_EUSTRESS_RATIO_MIN: float = 1.20
+STRESS_DISTRESS_RATIO_MIN: float = 1.20
+STRESS_MIN_ERRORED_WITH_IATS: int = 2
+
+# ── emotional_valence.frustration_venting (Step G.8) ──────────────────────
+FRUST_VENT_MIN_TYPED_CHARS: int = 30
+FRUST_VENT_FULL_CONFIDENCE_MIN: int = 200
--- a/tests/profiler/behave_shell/test_operational_objective.py
+++ b/tests/profiler/behave_shell/test_operational_objective.py
@@ -0,0 +1,116 @@
+"""Step G.1: ``operational.objective`` ∈ {recon, exfil, persistence,
+lateral, destructive}."""
+from __future__ import annotations
+
+from decnet.profiler.behave_shell import extract_session
+from decnet.profiler.behave_shell._parse import AsciinemaEvent
+
+
+PRIMITIVE = "operational.objective"
+
+
+def _of(observations: list, primitive: str):
+    obs = [o for o in observations if o.primitive == primitive]
+    assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
+    return obs[0]
+
+
+def _typed(text: str, t0: float = 0.0, dt: float = 0.05) -> list[AsciinemaEvent]:
+    return [(t0 + i * dt, "i", c) for i, c in enumerate(text)]
+
+
+def _cmd(token: str, t0: float, *, with_prompt: bool = True) -> list[AsciinemaEvent]:
+    events = _typed(f"{token}\r", t0=t0)
+    cmd_end = t0 + len(token) * 0.05
+    if with_prompt:
+        events.append((cmd_end + 0.10, "o", "out\nanti@host:~$ "))
+    else:
+        events.append((cmd_end + 0.10, "o", "out\n"))
+    return events
+
+
+def test_no_commands_no_emission() -> None:
+    out = list(extract_session([(0.0, "i", "x")], sid="g1-empty"))
+    assert [o for o in out if o.primitive == PRIMITIVE] == []
+
+
+def test_too_few_classified_skipped() -> None:
+    """Two recon commands < INTENT_MIN_COMMANDS=3 → no emission."""
+    events = _cmd("ls", t0=0.0) + _cmd("pwd", t0=1.0)
+    out = list(extract_session(events, sid="g1-thin"))
+    assert [o for o in out if o.primitive == PRIMITIVE] == []
+
+
+def test_unclassified_commands_skipped() -> None:
+    """``vim`` / ``foo`` / ``bar`` aren't in any intent set."""
+    events = (
+        _cmd("vim", t0=0.0)
+        + _cmd("foo", t0=1.0)
+        + _cmd("bar", t0=2.0)
+        + _cmd("baz", t0=3.0)
+    )
+    out = list(extract_session(events, sid="g1-unkn"))
+    assert [o for o in out if o.primitive == PRIMITIVE] == []
+
+
+def test_majority_recon_emits_recon() -> None:
+    events = (
+        _cmd("ls", t0=0.0)
+        + _cmd("pwd", t0=1.0)
+        + _cmd("whoami", t0=2.0)
+    )
+    obs = _of(list(extract_session(events, sid="g1-recon")), PRIMITIVE)
+    assert obs.value == "recon"
+    assert 0.39 < obs.confidence <= 0.60
+
+
+def test_majority_destructive_outranks_recon() -> None:
+    """Mixed: 3 destructive + 2 recon → destructive."""
+    events = (
+        _cmd("rm", t0=0.0)
+        + _cmd("ls", t0=1.0)
+        + _cmd("dd", t0=2.0)
+        + _cmd("pwd", t0=3.0)
+        + _cmd("shred", t0=4.0)
+    )
+    obs = _of(list(extract_session(events, sid="g1-dest")), PRIMITIVE)
+    assert obs.value == "destructive"
+
+
+def test_high_count_raises_confidence() -> None:
+    events: list[AsciinemaEvent] = []
+    for i, tok in enumerate(["ls", "pwd", "whoami", "id", "uname", "ps", "find"]):
+        events += _cmd(tok, t0=float(i))
+    obs = _of(list(extract_session(events, sid="g1-conf")), PRIMITIVE)
+    assert obs.value == "recon"
+    assert obs.confidence == 0.60
+
+
+def test_persistence_classifies() -> None:
+    events = (
+        _cmd("crontab", t0=0.0)
+        + _cmd("systemctl", t0=1.0)
+        + _cmd("passwd", t0=2.0)
+    )
+    obs = _of(list(extract_session(events, sid="g1-persist")), PRIMITIVE)
+    assert obs.value == "persistence"
+
+
+def test_exfil_classifies() -> None:
+    events = (
+        _cmd("curl", t0=0.0)
+        + _cmd("wget", t0=1.0)
+        + _cmd("scp", t0=2.0)
+    )
+    obs = _of(list(extract_session(events, sid="g1-exfil")), PRIMITIVE)
+    assert obs.value == "exfil"
+
+
+def test_lateral_classifies() -> None:
+    events = (
+        _cmd("ssh", t0=0.0)
+        + _cmd("kubectl", t0=1.0)
+        + _cmd("docker", t0=2.0)
+    )
+    obs = _of(list(extract_session(events, sid="g1-lat")), PRIMITIVE)
+    assert obs.value == "lateral"