"""``temporal.*`` feature functions — per-session subset. Phase E ships the four ``temporal.*`` primitives that don't need observation history. The other three (``session_timing``, ``persistence``, ``lifecycle_markers.idle_periodicity``) are Tier B and computed by the attribution engine, not the extractor. Step E.1: ``temporal.session_duration``. Step E.2: ``temporal.escalation_pattern``. Step E.3: ``temporal.lifecycle_markers.landing_ritual``. Step E.4: ``temporal.lifecycle_markers.exit_behavior`` (unblocked by F.0). """ from __future__ import annotations import math import statistics from typing import Iterator from behave_core.spec.envelope import Observation from decnet.profiler.behave_shell._ctx import SessionContext from decnet.profiler.behave_shell._features._emit import make_observation from decnet.profiler.behave_shell._parse import hash_token from decnet.profiler.behave_shell._thresholds import ( ESCALATION_BURSTY_CV, ESCALATION_BURSTY_ZERO_FRAC, ESCALATION_MIN_COMMANDS, ESCALATION_MIN_WINDOWS, ESCALATION_SUSTAINED_CV, ESCALATION_WINDOW_MIN_S, ESCALATION_WINDOW_TARGET, EXIT_BEHAVIOR_LOOKBACK_K, LANDING_RITUAL_FIRST_N, LANDING_RITUAL_HIT_MIN, LANDING_RITUAL_MIN_COMMANDS, SESSION_DURATION_LONG_MAX, SESSION_DURATION_MEDIUM_MAX, SESSION_DURATION_SHORT_MAX, ) # Precomputed at import time. ``graceful`` is operator-typed shutdown; # ``cleanup`` is the wipe-tracks vocabulary. Both expand to v0.2 once # the corpus shows what gets missed. _GRACEFUL_EXIT_HASHES: frozenset[str] = frozenset({ hash_token("exit"), hash_token("logout"), hash_token("quit"), hash_token("logoff"), }) _CLEANUP_TOKEN_HASHES: frozenset[str] = frozenset({ hash_token("history"), hash_token("unset"), hash_token("rm"), hash_token("shred"), hash_token("clear"), hash_token("kill"), }) # Precomputed at import time so the per-session check is a set lookup, # not 7 sha256 ops per session. The recon-survey vocabulary an attacker # (or scripted runner) typically opens with on a freshly-landed shell. _LANDING_RITUAL_HASHES: frozenset[str] = frozenset({ hash_token("uname"), hash_token("id"), hash_token("whoami"), hash_token("pwd"), hash_token("hostname"), hash_token("w"), hash_token("who"), }) def session_duration(ctx: SessionContext) -> Iterator[Observation]: """Emit ``temporal.session_duration`` ∈ {short, medium, long, marathon}. Direct measurement off ``ctx.duration_s``. Skip emission only when the session has neither commands nor any duration to speak of — a one-event session with ``duration_s == 0`` and no commands has nothing honest to bucket. Confidence is high — duration is a fact, not an inference. """ if ctx.duration_s <= 0.0 and not ctx.commands: return d = ctx.duration_s if d < SESSION_DURATION_SHORT_MAX: value = "short" elif d < SESSION_DURATION_MEDIUM_MAX: value = "medium" elif d < SESSION_DURATION_LONG_MAX: value = "long" else: value = "marathon" yield make_observation( ctx, primitive="temporal.session_duration", value=value, confidence=0.85, ) def escalation_pattern(ctx: SessionContext) -> Iterator[Observation]: """Emit ``temporal.escalation_pattern`` ∈ {sustained, erratic, bursty}. Bin commands into non-overlapping windows of width ``max(ESCALATION_WINDOW_MIN_S, duration_s / ESCALATION_WINDOW_TARGET)``. Compute the CV of per-window command counts and the fraction of zero-count windows. * **bursty** — significant silence (zero_frac ≥ threshold) AND high dispersion (CV ≥ threshold). Real spikes against a quiet background. * **sustained** — low dispersion (CV < threshold). Steady cadence. * **erratic** — fall-through. Variable but no clear silence pattern. Skip emission when the session is too short to bin meaningfully (no commands, or duration too small to produce any window). """ n_cmds = len(ctx.commands) if n_cmds == 0 or ctx.duration_s <= 0.0: return width = max(ESCALATION_WINDOW_MIN_S, ctx.duration_s / ESCALATION_WINDOW_TARGET) n_windows = max(1, math.ceil(ctx.duration_s / width)) counts = [0] * n_windows for cmd in ctx.commands: offset = cmd.start_ts - ctx.t_start idx = min(n_windows - 1, max(0, int(offset / width))) counts[idx] += 1 mean = statistics.fmean(counts) if mean <= 0.0 or len(counts) < 2: cv = 0.0 else: cv = statistics.stdev(counts) / mean zero_frac = sum(1 for c in counts if c == 0) / len(counts) if zero_frac >= ESCALATION_BURSTY_ZERO_FRAC and cv >= ESCALATION_BURSTY_CV: value = "bursty" elif cv < ESCALATION_SUSTAINED_CV: value = "sustained" else: value = "erratic" if n_windows < ESCALATION_MIN_WINDOWS or n_cmds < ESCALATION_MIN_COMMANDS: confidence = 0.40 else: confidence = 0.60 yield make_observation( ctx, primitive="temporal.escalation_pattern", value=value, confidence=confidence, ) def landing_ritual(ctx: SessionContext) -> Iterator[Observation]: """Emit ``temporal.lifecycle_markers.landing_ritual`` ∈ {present, absent}. Inspect the first ``LANDING_RITUAL_FIRST_N`` commands; if at least ``LANDING_RITUAL_HIT_MIN`` of their first_token_hashes match the recon-survey vocabulary set (``uname`` / ``id`` / ``whoami`` / ``pwd`` / ``hostname`` / ``w`` / ``who``), the operator opened with a landing ritual. Skip emission when there are no commands at all — the registry's binary doesn't admit ``unknown`` and emitting ``absent`` from nothing would be dishonest. Below ``LANDING_RITUAL_MIN_COMMANDS`` we still emit, but at lower confidence — short sessions can still show or fail to show a ritual. """ n = len(ctx.commands) if n == 0: return head = ctx.commands[:LANDING_RITUAL_FIRST_N] hits = sum(1 for c in head if c.first_token_hash in _LANDING_RITUAL_HASHES) value = "present" if hits >= LANDING_RITUAL_HIT_MIN else "absent" if n < LANDING_RITUAL_MIN_COMMANDS: confidence = 0.40 else: confidence = 0.65 yield make_observation( ctx, primitive="temporal.lifecycle_markers.landing_ritual", value=value, confidence=confidence, ) def exit_behavior(ctx: SessionContext) -> Iterator[Observation]: """Emit ``temporal.lifecycle_markers.exit_behavior`` ∈ {graceful, abrupt, cleanup}. Resolution of the E.4 hold from Phase E. Now that F.0's ``Command.followed_by_prompt`` gives us prompt-after-last-command visibility — the exit-code proxy we couldn't get in Phase E: 1. Last command **lacks** a trailing prompt → ``abrupt`` (session cut mid-output, custom PS1 swallowing, or genuinely interrupted). 2. Last command's first_token_hash ∈ ``_GRACEFUL_EXIT_HASHES`` (``exit`` / ``logout`` / ``quit`` / ``logoff``) → ``graceful``. 3. Any of the last ``EXIT_BEHAVIOR_LOOKBACK_K`` (3) commands' first_token_hash ∈ ``_CLEANUP_TOKEN_HASHES`` (``history`` / ``unset`` / ``rm`` / ``shred`` / ``clear`` / ``kill``) → ``cleanup``. 4. Else → ``graceful`` (clean Ctrl-D / window close). Skip emission when no commands. Confidence 0.65 when the trailing prompt is clear; 0.45 for ``abrupt`` (a custom PS1 suppressing prompt echo could also yield ``followed_by_prompt=False``). """ if not ctx.commands: return last = ctx.commands[-1] if not last.followed_by_prompt: value = "abrupt" confidence = 0.45 elif last.first_token_hash in _GRACEFUL_EXIT_HASHES: value = "graceful" confidence = 0.65 else: tail = ctx.commands[-EXIT_BEHAVIOR_LOOKBACK_K:] if any(c.first_token_hash in _CLEANUP_TOKEN_HASHES for c in tail): value = "cleanup" confidence = 0.65 else: value = "graceful" confidence = 0.65 yield make_observation( ctx, primitive="temporal.lifecycle_markers.exit_behavior", value=value, confidence=confidence, )