Four-part fix for the collection bottleneck that was blocking the dev loop: 1. Lazy mitreattack.stix20 import in attack_stix.py — deferred to first _load() call (TYPE_CHECKING guard at top level) 2. Lazy misp_stix_converter import in both MISP export routers — moved from module level into the route handler body 3. Lazy attack_catalog / attack_stix in ttp.py repo mixin — thin wrapper functions so the import chain never fires at module load time 4. tests/api/conftest.py — `from decnet.web.api import app` moved inside the `client()` fixture; `pytest_ignore_collect` broadened to skip all test_schemathesis*.py variants (not just test_schemathesis.py), which were launching a subprocess server at module-import time 5. pyproject.toml — `norecursedirs` for tests/live, tests/stress, tests/service_testing, tests/docker, tests/perf so these directories are never entered; `-m` filter removed from addopts (now redundant); `--dist loadscope` → `--dist load` to unblock workers immediately 6. behave_core / behave_shell rename — BEHAVE packages dropped the `decnet_` prefix; reinstalled editable installs and updated all 14 import sites across profiler, ttp, bus, and correlation modules
238 lines
8.1 KiB
Python
238 lines
8.1 KiB
Python
"""``temporal.*`` feature functions — per-session subset.
|
|
|
|
Phase E ships the four ``temporal.*`` primitives that don't need
|
|
observation history. The other three (``session_timing``,
|
|
``persistence``, ``lifecycle_markers.idle_periodicity``) are Tier B
|
|
and computed by the attribution engine, not the extractor.
|
|
|
|
Step E.1: ``temporal.session_duration``.
|
|
Step E.2: ``temporal.escalation_pattern``.
|
|
Step E.3: ``temporal.lifecycle_markers.landing_ritual``.
|
|
Step E.4: ``temporal.lifecycle_markers.exit_behavior`` (unblocked by F.0).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import math
|
|
import statistics
|
|
from typing import Iterator
|
|
|
|
from behave_core.spec.envelope import Observation
|
|
|
|
from decnet.profiler.behave_shell._ctx import SessionContext
|
|
from decnet.profiler.behave_shell._features._emit import make_observation
|
|
from decnet.profiler.behave_shell._parse import hash_token
|
|
from decnet.profiler.behave_shell._thresholds import (
|
|
ESCALATION_BURSTY_CV,
|
|
ESCALATION_BURSTY_ZERO_FRAC,
|
|
ESCALATION_MIN_COMMANDS,
|
|
ESCALATION_MIN_WINDOWS,
|
|
ESCALATION_SUSTAINED_CV,
|
|
ESCALATION_WINDOW_MIN_S,
|
|
ESCALATION_WINDOW_TARGET,
|
|
EXIT_BEHAVIOR_LOOKBACK_K,
|
|
LANDING_RITUAL_FIRST_N,
|
|
LANDING_RITUAL_HIT_MIN,
|
|
LANDING_RITUAL_MIN_COMMANDS,
|
|
SESSION_DURATION_LONG_MAX,
|
|
SESSION_DURATION_MEDIUM_MAX,
|
|
SESSION_DURATION_SHORT_MAX,
|
|
)
|
|
|
|
|
|
# Precomputed at import time. ``graceful`` is operator-typed shutdown;
|
|
# ``cleanup`` is the wipe-tracks vocabulary. Both expand to v0.2 once
|
|
# the corpus shows what gets missed.
|
|
_GRACEFUL_EXIT_HASHES: frozenset[str] = frozenset({
|
|
hash_token("exit"),
|
|
hash_token("logout"),
|
|
hash_token("quit"),
|
|
hash_token("logoff"),
|
|
})
|
|
_CLEANUP_TOKEN_HASHES: frozenset[str] = frozenset({
|
|
hash_token("history"),
|
|
hash_token("unset"),
|
|
hash_token("rm"),
|
|
hash_token("shred"),
|
|
hash_token("clear"),
|
|
hash_token("kill"),
|
|
})
|
|
|
|
|
|
# Precomputed at import time so the per-session check is a set lookup,
|
|
# not 7 sha256 ops per session. The recon-survey vocabulary an attacker
|
|
# (or scripted runner) typically opens with on a freshly-landed shell.
|
|
_LANDING_RITUAL_HASHES: frozenset[str] = frozenset({
|
|
hash_token("uname"),
|
|
hash_token("id"),
|
|
hash_token("whoami"),
|
|
hash_token("pwd"),
|
|
hash_token("hostname"),
|
|
hash_token("w"),
|
|
hash_token("who"),
|
|
})
|
|
|
|
|
|
def session_duration(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``temporal.session_duration`` ∈ {short, medium, long, marathon}.
|
|
|
|
Direct measurement off ``ctx.duration_s``. Skip emission only when
|
|
the session has neither commands nor any duration to speak of —
|
|
a one-event session with ``duration_s == 0`` and no commands has
|
|
nothing honest to bucket. Confidence is high — duration is a fact,
|
|
not an inference.
|
|
"""
|
|
if ctx.duration_s <= 0.0 and not ctx.commands:
|
|
return
|
|
d = ctx.duration_s
|
|
if d < SESSION_DURATION_SHORT_MAX:
|
|
value = "short"
|
|
elif d < SESSION_DURATION_MEDIUM_MAX:
|
|
value = "medium"
|
|
elif d < SESSION_DURATION_LONG_MAX:
|
|
value = "long"
|
|
else:
|
|
value = "marathon"
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="temporal.session_duration",
|
|
value=value,
|
|
confidence=0.85,
|
|
)
|
|
|
|
|
|
def escalation_pattern(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``temporal.escalation_pattern`` ∈ {sustained, erratic, bursty}.
|
|
|
|
Bin commands into non-overlapping windows of width
|
|
``max(ESCALATION_WINDOW_MIN_S, duration_s / ESCALATION_WINDOW_TARGET)``.
|
|
Compute the CV of per-window command counts and the fraction of
|
|
zero-count windows.
|
|
|
|
* **bursty** — significant silence (zero_frac ≥ threshold) AND
|
|
high dispersion (CV ≥ threshold). Real spikes against a quiet
|
|
background.
|
|
* **sustained** — low dispersion (CV < threshold). Steady cadence.
|
|
* **erratic** — fall-through. Variable but no clear silence
|
|
pattern.
|
|
|
|
Skip emission when the session is too short to bin meaningfully
|
|
(no commands, or duration too small to produce any window).
|
|
"""
|
|
n_cmds = len(ctx.commands)
|
|
if n_cmds == 0 or ctx.duration_s <= 0.0:
|
|
return
|
|
width = max(ESCALATION_WINDOW_MIN_S, ctx.duration_s / ESCALATION_WINDOW_TARGET)
|
|
n_windows = max(1, math.ceil(ctx.duration_s / width))
|
|
counts = [0] * n_windows
|
|
for cmd in ctx.commands:
|
|
offset = cmd.start_ts - ctx.t_start
|
|
idx = min(n_windows - 1, max(0, int(offset / width)))
|
|
counts[idx] += 1
|
|
|
|
mean = statistics.fmean(counts)
|
|
if mean <= 0.0 or len(counts) < 2:
|
|
cv = 0.0
|
|
else:
|
|
cv = statistics.stdev(counts) / mean
|
|
zero_frac = sum(1 for c in counts if c == 0) / len(counts)
|
|
|
|
if zero_frac >= ESCALATION_BURSTY_ZERO_FRAC and cv >= ESCALATION_BURSTY_CV:
|
|
value = "bursty"
|
|
elif cv < ESCALATION_SUSTAINED_CV:
|
|
value = "sustained"
|
|
else:
|
|
value = "erratic"
|
|
|
|
if n_windows < ESCALATION_MIN_WINDOWS or n_cmds < ESCALATION_MIN_COMMANDS:
|
|
confidence = 0.40
|
|
else:
|
|
confidence = 0.60
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="temporal.escalation_pattern",
|
|
value=value,
|
|
confidence=confidence,
|
|
)
|
|
|
|
|
|
def landing_ritual(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``temporal.lifecycle_markers.landing_ritual`` ∈ {present, absent}.
|
|
|
|
Inspect the first ``LANDING_RITUAL_FIRST_N`` commands; if at least
|
|
``LANDING_RITUAL_HIT_MIN`` of their first_token_hashes match the
|
|
recon-survey vocabulary set (``uname`` / ``id`` / ``whoami`` /
|
|
``pwd`` / ``hostname`` / ``w`` / ``who``), the operator opened
|
|
with a landing ritual.
|
|
|
|
Skip emission when there are no commands at all — the registry's
|
|
binary doesn't admit ``unknown`` and emitting ``absent`` from
|
|
nothing would be dishonest. Below ``LANDING_RITUAL_MIN_COMMANDS``
|
|
we still emit, but at lower confidence — short sessions can still
|
|
show or fail to show a ritual.
|
|
"""
|
|
n = len(ctx.commands)
|
|
if n == 0:
|
|
return
|
|
head = ctx.commands[:LANDING_RITUAL_FIRST_N]
|
|
hits = sum(1 for c in head if c.first_token_hash in _LANDING_RITUAL_HASHES)
|
|
value = "present" if hits >= LANDING_RITUAL_HIT_MIN else "absent"
|
|
|
|
if n < LANDING_RITUAL_MIN_COMMANDS:
|
|
confidence = 0.40
|
|
else:
|
|
confidence = 0.65
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="temporal.lifecycle_markers.landing_ritual",
|
|
value=value,
|
|
confidence=confidence,
|
|
)
|
|
|
|
|
|
def exit_behavior(ctx: SessionContext) -> Iterator[Observation]:
|
|
"""Emit ``temporal.lifecycle_markers.exit_behavior`` ∈ {graceful, abrupt, cleanup}.
|
|
|
|
Resolution of the E.4 hold from Phase E. Now that F.0's
|
|
``Command.followed_by_prompt`` gives us prompt-after-last-command
|
|
visibility — the exit-code proxy we couldn't get in Phase E:
|
|
|
|
1. Last command **lacks** a trailing prompt → ``abrupt`` (session
|
|
cut mid-output, custom PS1 swallowing, or genuinely interrupted).
|
|
2. Last command's first_token_hash ∈ ``_GRACEFUL_EXIT_HASHES``
|
|
(``exit`` / ``logout`` / ``quit`` / ``logoff``) → ``graceful``.
|
|
3. Any of the last ``EXIT_BEHAVIOR_LOOKBACK_K`` (3) commands'
|
|
first_token_hash ∈ ``_CLEANUP_TOKEN_HASHES`` (``history`` /
|
|
``unset`` / ``rm`` / ``shred`` / ``clear`` / ``kill``) →
|
|
``cleanup``.
|
|
4. Else → ``graceful`` (clean Ctrl-D / window close).
|
|
|
|
Skip emission when no commands.
|
|
|
|
Confidence 0.65 when the trailing prompt is clear; 0.45 for
|
|
``abrupt`` (a custom PS1 suppressing prompt echo could also yield
|
|
``followed_by_prompt=False``).
|
|
"""
|
|
if not ctx.commands:
|
|
return
|
|
last = ctx.commands[-1]
|
|
if not last.followed_by_prompt:
|
|
value = "abrupt"
|
|
confidence = 0.45
|
|
elif last.first_token_hash in _GRACEFUL_EXIT_HASHES:
|
|
value = "graceful"
|
|
confidence = 0.65
|
|
else:
|
|
tail = ctx.commands[-EXIT_BEHAVIOR_LOOKBACK_K:]
|
|
if any(c.first_token_hash in _CLEANUP_TOKEN_HASHES for c in tail):
|
|
value = "cleanup"
|
|
confidence = 0.65
|
|
else:
|
|
value = "graceful"
|
|
confidence = 0.65
|
|
yield make_observation(
|
|
ctx,
|
|
primitive="temporal.lifecycle_markers.exit_behavior",
|
|
value=value,
|
|
confidence=confidence,
|
|
)
|