feat(profiler/behave_shell): G.1 operational.objective

Per-command intent classification via the G.0 lexicon
(`destructive > persistence > exfil > lateral > recon` precedence);
majority vote across classified commands. Skip emission below
INTENT_MIN_COMMANDS=3 classified hits. Confidence 0.40 below
INTENT_FULL_CONFIDENCE_MIN=6, 0.60 above.
This commit is contained in:
2026-05-08 16:28:45 -04:00
parent 289a64014c
commit c11f3605be
4 changed files with 232 additions and 0 deletions

View File

@@ -31,6 +31,9 @@ from decnet.profiler.behave_shell._features.environmental import (
shell_type,
terminal_multiplexer,
)
from decnet.profiler.behave_shell._features.operational import (
objective,
)
from decnet.profiler.behave_shell._features.temporal import (
escalation_pattern,
exit_behavior,
@@ -81,4 +84,5 @@ FEATURES: tuple[FeatureFn, ...] = (
locale,
keyboard_layout,
numpad_usage,
objective,
)

View File

@@ -0,0 +1,62 @@
"""``operational.*`` feature functions (Phase G).
Step G.1: ``operational.objective``.
Step G.2: ``operational.opsec_discipline`` (lands later).
Step G.3: ``operational.cleanup_behavior`` (lands later).
Step G.4: ``operational.multi_actor_indicators`` (lands later).
"""
from __future__ import annotations
import collections
from typing import Iterator
from decnet_behave_core.spec.envelope import Observation
from decnet.profiler.behave_shell._ctx import SessionContext
from decnet.profiler.behave_shell._features._emit import make_observation
from decnet.profiler.behave_shell._intent import classify_intent
from decnet.profiler.behave_shell._thresholds import (
INTENT_FULL_CONFIDENCE_MIN,
INTENT_MIN_COMMANDS,
)
def objective(ctx: SessionContext) -> Iterator[Observation]:
"""Emit ``operational.objective`` ∈ {recon, exfil, persistence,
lateral, destructive}.
Walk every command's ``first_token_hash`` through
:func:`classify_intent` (fixed precedence:
``destructive > persistence > exfil > lateral > recon``).
Commands that don't classify (token not in any set) are skipped —
the registry has no ``unknown`` value here, so a session of pure
``vim`` / ``ls`` operations is allowed to fall through and emit
``recon`` only if at least :data:`INTENT_MIN_COMMANDS` commands
actually classify.
Skip emission when fewer than ``INTENT_MIN_COMMANDS`` classified
hits — too thin to call. Otherwise majority vote (ties broken by
precedence order via ``most_common(1)``-stable sort over the
insertion order, which mirrors the precedence walk).
Confidence: 0.40 below :data:`INTENT_FULL_CONFIDENCE_MIN`; 0.60
above. v0.1 lexicon — corpus tuning revisits in v0.2.
"""
if not ctx.commands:
return
counter: collections.Counter[str] = collections.Counter()
for cmd in ctx.commands:
label = classify_intent(cmd.first_token_hash)
if label is not None:
counter[label] += 1
n_classified = sum(counter.values())
if n_classified < INTENT_MIN_COMMANDS:
return
value = counter.most_common(1)[0][0]
confidence = 0.60 if n_classified >= INTENT_FULL_CONFIDENCE_MIN else 0.40
yield make_observation(
ctx,
primitive="operational.objective",
value=value,
confidence=confidence,
)

View File

@@ -364,3 +364,53 @@ SHELL_MASTERY_MIN_COMMANDS: int = 5
# used by Phase C primitives. ±10% of the boundary value drops
# confidence by 0.20 per BEHAVE-EXTRACTOR.md §"Threshold proximity".
SHELL_MASTERY_BOUNDARY_BAND: float = 0.10
# ── operational.objective (Step G.1) ───────────────────────────────────────
# Below this many *classified* commands, skip emission — too few hits
# in any intent set to honestly call a session's objective.
INTENT_MIN_COMMANDS: int = 3
# At/above this many classified commands, raise confidence from 0.40 to
# 0.60. Both v0.1 numbers; corpus tuning lands later.
INTENT_FULL_CONFIDENCE_MIN: int = 6
# ── operational.cleanup_behavior (Step G.3) ───────────────────────────────
# Window of trailing commands inspected for cleanup-family hashes.
CLEANUP_TAIL_K: int = 5
# Distinct cleanup-family hashes in the tail required to call ``thorough``.
CLEANUP_THOROUGH_MIN_DISTINCT: int = 3
# ── operational.multi_actor_indicators (Step G.4) ─────────────────────────
# Total command floor — below this we skip emission (no honest first-half
# / second-half split).
MULTI_ACTOR_MIN_COMMANDS: int = 8
# Each half needs at least this many commands to compute a stable median.
MULTI_ACTOR_HALF_MIN_COMMANDS: int = 4
# Relative delta between the two halves' median IATs that flips the
# verdict to ``handoff_detected``.
MULTI_ACTOR_HANDOFF_DELTA: float = 0.50
# ── emotional_valence.* hard confidence cap (Phase G) ─────────────────────
# Registry-pinned ceiling for the four soft primitives. Enforced inside
# each ``emotional_valence.*`` feature function (not in make_observation).
EMOTIONAL_VALENCE_CONFIDENCE_CAP: float = 0.50
# ── emotional_valence.valence (Step G.5) ──────────────────────────────────
VALENCE_MIN_TYPED_CHARS: int = 80
VALENCE_FULL_CONFIDENCE_MIN: int = 200
VALENCE_MIN_HITS: int = 2
# ── emotional_valence.arousal (Step G.6) ──────────────────────────────────
AROUSAL_FAST_IAT_S: float = 0.06
AROUSAL_CALM_IAT_S: float = 0.30
AROUSAL_MIN_IATS: int = 30
AROUSAL_CAPS_RUN_MIN: int = 5
AROUSAL_BANG_RUN_MIN: int = 3
# ── emotional_valence.stress_response (Step G.7) ──────────────────────────
STRESS_EUSTRESS_RATIO_MIN: float = 1.20
STRESS_DISTRESS_RATIO_MIN: float = 1.20
STRESS_MIN_ERRORED_WITH_IATS: int = 2
# ── emotional_valence.frustration_venting (Step G.8) ──────────────────────
FRUST_VENT_MIN_TYPED_CHARS: int = 30
FRUST_VENT_FULL_CONFIDENCE_MIN: int = 200

View File

@@ -0,0 +1,116 @@
"""Step G.1: ``operational.objective`` ∈ {recon, exfil, persistence,
lateral, destructive}."""
from __future__ import annotations
from decnet.profiler.behave_shell import extract_session
from decnet.profiler.behave_shell._parse import AsciinemaEvent
PRIMITIVE = "operational.objective"
def _of(observations: list, primitive: str):
obs = [o for o in observations if o.primitive == primitive]
assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
return obs[0]
def _typed(text: str, t0: float = 0.0, dt: float = 0.05) -> list[AsciinemaEvent]:
return [(t0 + i * dt, "i", c) for i, c in enumerate(text)]
def _cmd(token: str, t0: float, *, with_prompt: bool = True) -> list[AsciinemaEvent]:
events = _typed(f"{token}\r", t0=t0)
cmd_end = t0 + len(token) * 0.05
if with_prompt:
events.append((cmd_end + 0.10, "o", "out\nanti@host:~$ "))
else:
events.append((cmd_end + 0.10, "o", "out\n"))
return events
def test_no_commands_no_emission() -> None:
out = list(extract_session([(0.0, "i", "x")], sid="g1-empty"))
assert [o for o in out if o.primitive == PRIMITIVE] == []
def test_too_few_classified_skipped() -> None:
"""Two recon commands < INTENT_MIN_COMMANDS=3 → no emission."""
events = _cmd("ls", t0=0.0) + _cmd("pwd", t0=1.0)
out = list(extract_session(events, sid="g1-thin"))
assert [o for o in out if o.primitive == PRIMITIVE] == []
def test_unclassified_commands_skipped() -> None:
"""``vim`` / ``foo`` / ``bar`` aren't in any intent set."""
events = (
_cmd("vim", t0=0.0)
+ _cmd("foo", t0=1.0)
+ _cmd("bar", t0=2.0)
+ _cmd("baz", t0=3.0)
)
out = list(extract_session(events, sid="g1-unkn"))
assert [o for o in out if o.primitive == PRIMITIVE] == []
def test_majority_recon_emits_recon() -> None:
events = (
_cmd("ls", t0=0.0)
+ _cmd("pwd", t0=1.0)
+ _cmd("whoami", t0=2.0)
)
obs = _of(list(extract_session(events, sid="g1-recon")), PRIMITIVE)
assert obs.value == "recon"
assert 0.39 < obs.confidence <= 0.60
def test_majority_destructive_outranks_recon() -> None:
"""Mixed: 3 destructive + 2 recon → destructive."""
events = (
_cmd("rm", t0=0.0)
+ _cmd("ls", t0=1.0)
+ _cmd("dd", t0=2.0)
+ _cmd("pwd", t0=3.0)
+ _cmd("shred", t0=4.0)
)
obs = _of(list(extract_session(events, sid="g1-dest")), PRIMITIVE)
assert obs.value == "destructive"
def test_high_count_raises_confidence() -> None:
events: list[AsciinemaEvent] = []
for i, tok in enumerate(["ls", "pwd", "whoami", "id", "uname", "ps", "find"]):
events += _cmd(tok, t0=float(i))
obs = _of(list(extract_session(events, sid="g1-conf")), PRIMITIVE)
assert obs.value == "recon"
assert obs.confidence == 0.60
def test_persistence_classifies() -> None:
events = (
_cmd("crontab", t0=0.0)
+ _cmd("systemctl", t0=1.0)
+ _cmd("passwd", t0=2.0)
)
obs = _of(list(extract_session(events, sid="g1-persist")), PRIMITIVE)
assert obs.value == "persistence"
def test_exfil_classifies() -> None:
events = (
_cmd("curl", t0=0.0)
+ _cmd("wget", t0=1.0)
+ _cmd("scp", t0=2.0)
)
obs = _of(list(extract_session(events, sid="g1-exfil")), PRIMITIVE)
assert obs.value == "exfil"
def test_lateral_classifies() -> None:
events = (
_cmd("ssh", t0=0.0)
+ _cmd("kubectl", t0=1.0)
+ _cmd("docker", t0=2.0)
)
obs = _of(list(extract_session(events, sid="g1-lat")), PRIMITIVE)
assert obs.value == "lateral"