feat(profiler/behave_shell): emit temporal.lifecycle_markers.landing_ritual

Inspect the first N commands; if at least K of their first_token_hashes match the recon-survey vocabulary (uname/id/whoami/pwd/hostname/w/who), emit present, else absent. Hashes precomputed at module load; PII-safe. v0.1 N=5, K=2.
2026-05-04 00:15:05 -04:00
parent d40495d71b
commit 1341df2705
4 changed files with 139 additions and 0 deletions
--- a/decnet/profiler/behave_shell/_features/init.py
+++ b/decnet/profiler/behave_shell/_features/init.py
@@ -26,6 +26,7 @@ from decnet.profiler.behave_shell._features.cognitive import (
 )
 from decnet.profiler.behave_shell._features.temporal import (
    escalation_pattern,
+    landing_ritual,
    session_duration,
 )
 from decnet.profiler.behave_shell._features.motor import (
@@ -65,4 +66,5 @@ FEATURES: tuple[FeatureFn, ...] = (
    error_resilience_fallback_to_man,
    session_duration,
    escalation_pattern,
+    landing_ritual,
 )
--- a/decnet/profiler/behave_shell/_features/temporal.py
+++ b/decnet/profiler/behave_shell/_features/temporal.py
@@ -7,6 +7,7 @@ and computed by the attribution engine, not the extractor.

 Step E.1: ``temporal.session_duration``.
 Step E.2: ``temporal.escalation_pattern``.
+Step E.3: ``temporal.lifecycle_markers.landing_ritual``.
 """
 from __future__ import annotations

@@ -18,6 +19,7 @@ from decnet_behave_core.spec.envelope import Observation

 from decnet.profiler.behave_shell._ctx import SessionContext
 from decnet.profiler.behave_shell._features._emit import make_observation
+from decnet.profiler.behave_shell._parse import hash_token
 from decnet.profiler.behave_shell._thresholds import (
    ESCALATION_BURSTY_CV,
    ESCALATION_BURSTY_ZERO_FRAC,
@@ -26,12 +28,29 @@ from decnet.profiler.behave_shell._thresholds import (
    ESCALATION_SUSTAINED_CV,
    ESCALATION_WINDOW_MIN_S,
    ESCALATION_WINDOW_TARGET,
+    LANDING_RITUAL_FIRST_N,
+    LANDING_RITUAL_HIT_MIN,
+    LANDING_RITUAL_MIN_COMMANDS,
    SESSION_DURATION_LONG_MAX,
    SESSION_DURATION_MEDIUM_MAX,
    SESSION_DURATION_SHORT_MAX,
 )


+# Precomputed at import time so the per-session check is a set lookup,
+# not 7 sha256 ops per session. The recon-survey vocabulary an attacker
+# (or scripted runner) typically opens with on a freshly-landed shell.
+_LANDING_RITUAL_HASHES: frozenset[str] = frozenset({
+    hash_token("uname"),
+    hash_token("id"),
+    hash_token("whoami"),
+    hash_token("pwd"),
+    hash_token("hostname"),
+    hash_token("w"),
+    hash_token("who"),
+})
+
+
 def session_duration(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``temporal.session_duration`` ∈ {short, medium, long, marathon}.

@@ -113,3 +132,37 @@ def escalation_pattern(ctx: SessionContext) -> Iterator[Observation]:
        value=value,
        confidence=confidence,
    )
+
+
+def landing_ritual(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``temporal.lifecycle_markers.landing_ritual`` ∈ {present, absent}.
+
+    Inspect the first ``LANDING_RITUAL_FIRST_N`` commands; if at least
+    ``LANDING_RITUAL_HIT_MIN`` of their first_token_hashes match the
+    recon-survey vocabulary set (``uname`` / ``id`` / ``whoami`` /
+    ``pwd`` / ``hostname`` / ``w`` / ``who``), the operator opened
+    with a landing ritual.
+
+    Skip emission when there are no commands at all — the registry's
+    binary doesn't admit ``unknown`` and emitting ``absent`` from
+    nothing would be dishonest. Below ``LANDING_RITUAL_MIN_COMMANDS``
+    we still emit, but at lower confidence — short sessions can still
+    show or fail to show a ritual.
+    """
+    n = len(ctx.commands)
+    if n == 0:
+        return
+    head = ctx.commands[:LANDING_RITUAL_FIRST_N]
+    hits = sum(1 for c in head if c.first_token_hash in _LANDING_RITUAL_HASHES)
+    value = "present" if hits >= LANDING_RITUAL_HIT_MIN else "absent"
+
+    if n < LANDING_RITUAL_MIN_COMMANDS:
+        confidence = 0.40
+    else:
+        confidence = 0.65
+    yield make_observation(
+        ctx,
+        primitive="temporal.lifecycle_markers.landing_ritual",
+        value=value,
+        confidence=confidence,
+    )
--- a/decnet/profiler/behave_shell/_thresholds.py
+++ b/decnet/profiler/behave_shell/_thresholds.py
@@ -210,6 +210,14 @@ ESCALATION_SUSTAINED_CV: float = 0.50
 ESCALATION_MIN_WINDOWS: int = 5
 ESCALATION_MIN_COMMANDS: int = 5

+# ── temporal.lifecycle_markers.landing_ritual (Step E.3) ──────────────────
+# How many of the first ``LANDING_RITUAL_FIRST_N`` commands must hit
+# the recon-token set (uname / id / whoami / pwd / hostname / w / who)
+# for the session to count as having a landing ritual.
+LANDING_RITUAL_FIRST_N: int = 5
+LANDING_RITUAL_HIT_MIN: int = 2
+LANDING_RITUAL_MIN_COMMANDS: int = 3
+
 # ── motor.keystroke_cadence (Step B.1) ──────────────────────────────────────
 # Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between
 # commands don't inflate the within-burst CV. Mirrors the prototype's
--- a/tests/profiler/behave_shell/test_temporal_landing_ritual.py
+++ b/tests/profiler/behave_shell/test_temporal_landing_ritual.py
@@ -0,0 +1,76 @@
+"""Step E.3: ``temporal.lifecycle_markers.landing_ritual``."""
+from __future__ import annotations
+
+from decnet.profiler.behave_shell import extract_session
+from decnet.profiler.behave_shell._parse import AsciinemaEvent
+
+
+PRIMITIVE = "temporal.lifecycle_markers.landing_ritual"
+
+
+def _of(observations: list, primitive: str):
+    obs = [o for o in observations if o.primitive == primitive]
+    assert len(obs) == 1, f"expected exactly one {primitive}, got {len(obs)}"
+    return obs[0]
+
+
+def _cmds(tokens: list[str]) -> list[AsciinemaEvent]:
+    events: list[AsciinemaEvent] = []
+    for i, tok in enumerate(tokens):
+        t0 = i * 1.0
+        for j, c in enumerate(tok):
+            events.append((t0 + j * 0.05, "i", c))
+        events.append((t0 + len(tok) * 0.05, "i", "\r"))
+    return events
+
+
+def test_no_commands_no_emission() -> None:
+    out = list(extract_session([(0.0, "i", "a")], sid="lr-empty"))
+    assert [o for o in out if o.primitive == PRIMITIVE] == []
+
+
+def test_recon_survey_emits_present() -> None:
+    """First commands are uname/id/whoami → present."""
+    out = list(extract_session(
+        _cmds(["uname", "id", "whoami", "ls", "ps", "cat"]),
+        sid="lr-present",
+    ))
+    obs = _of(out, PRIMITIVE)
+    assert obs.value == "present"
+
+
+def test_single_recon_token_below_threshold_emits_absent() -> None:
+    """One recon token in first-5 isn't enough (need ≥2) → absent."""
+    out = list(extract_session(
+        _cmds(["uname", "vim", "edit", "save", "exit", "ls"]),
+        sid="lr-onehit",
+    ))
+    obs = _of(out, PRIMITIVE)
+    assert obs.value == "absent"
+
+
+def test_no_recon_tokens_emits_absent() -> None:
+    out = list(extract_session(
+        _cmds(["vim", "edit", "save", "make", "ls", "cat"]),
+        sid="lr-absent",
+    ))
+    obs = _of(out, PRIMITIVE)
+    assert obs.value == "absent"
+
+
+def test_recon_after_first_n_does_not_count() -> None:
+    """Only the first N=5 commands are considered."""
+    out = list(extract_session(
+        _cmds(["vim", "edit", "save", "make", "test", "uname", "id", "whoami"]),
+        sid="lr-late",
+    ))
+    obs = _of(out, PRIMITIVE)
+    assert obs.value == "absent"
+
+
+def test_short_session_low_confidence() -> None:
+    short = list(extract_session(_cmds(["uname", "id"]), sid="lr-short"))
+    full = list(extract_session(_cmds(["uname", "id", "whoami", "ls", "ps"]), sid="lr-full"))
+    s = _of(short, PRIMITIVE)
+    f = _of(full, PRIMITIVE)
+    assert s.confidence < f.confidence