feat(profiler/behave_shell): F.0 prompt-line detector
Adds PromptLine dataclass + extract_prompt_lines() helper. PromptLine carries ts, suffix_char ($/#/%/>), raw_line (ANSI-stripped, capped), is_root flag. Populated during the existing single-pass output-window walk; SessionContext gains prompt_lines, Command gains followed_by_prompt. PII trade-off (ANTI-authorised at Phase F): PS1 text retained on ctx so F.1 / F.3 / E.4 can read it. Capped at PROMPT_LINE_MAX_CHARS=256. Observations still only carry derived primitive values. D.0's regex error helpers stay alongside (NOT subsumed) — they fire even when PS1 echo is suppressed. F.0 enriches D.0 rather than replacing it.
This commit is contained in:
@@ -18,7 +18,9 @@ from decnet.profiler.behave_shell._parse import (
|
|||||||
AsciinemaEvent,
|
AsciinemaEvent,
|
||||||
Command,
|
Command,
|
||||||
PasteBurst,
|
PasteBurst,
|
||||||
|
PromptLine,
|
||||||
detect_error_in_output,
|
detect_error_in_output,
|
||||||
|
extract_prompt_lines,
|
||||||
hash_token,
|
hash_token,
|
||||||
strip_ansi,
|
strip_ansi,
|
||||||
)
|
)
|
||||||
@@ -26,6 +28,7 @@ from decnet.profiler.behave_shell._thresholds import (
|
|||||||
IKI_THINK_MAX_S,
|
IKI_THINK_MAX_S,
|
||||||
PASTE_BURST_MAX_IAT_S,
|
PASTE_BURST_MAX_IAT_S,
|
||||||
PASTE_MIN_CHARS_PER_EVENT,
|
PASTE_MIN_CHARS_PER_EVENT,
|
||||||
|
PROMPT_LINE_MAX_CHARS,
|
||||||
SHORTCUT_CTRL_BYTES,
|
SHORTCUT_CTRL_BYTES,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -63,6 +66,9 @@ class SessionContext:
|
|||||||
# Step B.4 derivations — per-command intra-typing IATs
|
# Step B.4 derivations — per-command intra-typing IATs
|
||||||
intra_command_iats: tuple[tuple[float, ...], ...] = field(default_factory=tuple)
|
intra_command_iats: tuple[tuple[float, ...], ...] = field(default_factory=tuple)
|
||||||
|
|
||||||
|
# Step F.0 derivations — PS1 prompt lines detected in the output stream
|
||||||
|
prompt_lines: tuple[PromptLine, ...] = field(default_factory=tuple)
|
||||||
|
|
||||||
|
|
||||||
def _detect_paste_bursts(
|
def _detect_paste_bursts(
|
||||||
inputs: list[AsciinemaEvent],
|
inputs: list[AsciinemaEvent],
|
||||||
@@ -225,8 +231,14 @@ def _segment_commands(inputs: list[AsciinemaEvent]) -> tuple[Command, ...]:
|
|||||||
def _annotate_commands_with_output(
|
def _annotate_commands_with_output(
|
||||||
commands: tuple[Command, ...],
|
commands: tuple[Command, ...],
|
||||||
outputs: list[AsciinemaEvent],
|
outputs: list[AsciinemaEvent],
|
||||||
) -> tuple[Command, ...]:
|
) -> tuple[tuple[Command, ...], tuple[PromptLine, ...]]:
|
||||||
"""Re-emit ``commands`` with ``errored`` / ``output_bytes`` filled.
|
"""Re-emit ``commands`` with output-derived fields filled.
|
||||||
|
|
||||||
|
Returns ``(commands, prompt_lines)``. Each ``Command`` gains
|
||||||
|
``errored``, ``output_bytes``, and ``followed_by_prompt`` (Step
|
||||||
|
F.0). The flattened tuple of all detected ``PromptLine`` instances
|
||||||
|
across every command's window is returned alongside for the caller
|
||||||
|
to install on ``SessionContext.prompt_lines``.
|
||||||
|
|
||||||
The output window for ``commands[i]`` spans from its ``end_ts``
|
The output window for ``commands[i]`` spans from its ``end_ts``
|
||||||
(the ``\\r``/``\\n`` that ran it) to the ``start_ts`` of the next
|
(the ``\\r``/``\\n`` that ran it) to the ``start_ts`` of the next
|
||||||
@@ -234,11 +246,13 @@ def _annotate_commands_with_output(
|
|||||||
so output events arriving at or after ``t_end`` are still captured.
|
so output events arriving at or after ``t_end`` are still captured.
|
||||||
"""
|
"""
|
||||||
if not commands:
|
if not commands:
|
||||||
return commands
|
return commands, ()
|
||||||
annotated: list[Command] = []
|
annotated: list[Command] = []
|
||||||
|
all_prompts: list[PromptLine] = []
|
||||||
for i, cmd in enumerate(commands):
|
for i, cmd in enumerate(commands):
|
||||||
win_end = commands[i + 1].start_ts if i + 1 < len(commands) else math.inf
|
win_end = commands[i + 1].start_ts if i + 1 < len(commands) else math.inf
|
||||||
byte_count, errored = _output_window(outputs, cmd.end_ts, win_end)
|
byte_count, errored, prompts = _output_window(outputs, cmd.end_ts, win_end)
|
||||||
|
all_prompts.extend(prompts)
|
||||||
annotated.append(Command(
|
annotated.append(Command(
|
||||||
start_ts=cmd.start_ts,
|
start_ts=cmd.start_ts,
|
||||||
end_ts=cmd.end_ts,
|
end_ts=cmd.end_ts,
|
||||||
@@ -248,8 +262,9 @@ def _annotate_commands_with_output(
|
|||||||
pipe_count=cmd.pipe_count,
|
pipe_count=cmd.pipe_count,
|
||||||
errored=errored,
|
errored=errored,
|
||||||
output_bytes=byte_count,
|
output_bytes=byte_count,
|
||||||
|
followed_by_prompt=bool(prompts),
|
||||||
))
|
))
|
||||||
return tuple(annotated)
|
return tuple(annotated), tuple(all_prompts)
|
||||||
|
|
||||||
|
|
||||||
def _per_command_iats(
|
def _per_command_iats(
|
||||||
@@ -289,26 +304,37 @@ def _output_window(
|
|||||||
outputs: list[AsciinemaEvent],
|
outputs: list[AsciinemaEvent],
|
||||||
start: float,
|
start: float,
|
||||||
end: float,
|
end: float,
|
||||||
) -> tuple[int, bool]:
|
) -> tuple[int, bool, tuple[PromptLine, ...]]:
|
||||||
"""Walk output events in ``[start, end)`` once.
|
"""Walk output events in ``[start, end)`` once.
|
||||||
|
|
||||||
Returns ``(byte_count, errored)``. ``byte_count`` is the raw byte
|
Returns ``(byte_count, errored, prompt_lines)``. ``byte_count`` is
|
||||||
count (pre-strip); ``errored`` is the canonical-error-pattern match
|
the raw byte count (pre-strip); ``errored`` is the canonical-error
|
||||||
over the ANSI-stripped concatenation. The stripped text is dropped
|
-pattern match over the ANSI-stripped concatenation;
|
||||||
on return — PII discipline: only an int and a bool leave this
|
``prompt_lines`` is the tuple of PS1 lines detected in the same
|
||||||
helper. The full output bytes never enter ``Command`` or the
|
stripped text (Step F.0).
|
||||||
``SessionContext``.
|
|
||||||
|
PII trade-off (Phase F): the stripped text itself is dropped on
|
||||||
|
return, but ``prompt_lines`` retains PS1 strings (capped at
|
||||||
|
``PROMPT_LINE_MAX_CHARS``). Only derived values leave the engine
|
||||||
|
via observations; the prompt strings live on ``SessionContext``
|
||||||
|
so F.1 / F.3 / E.4 can read them.
|
||||||
"""
|
"""
|
||||||
chunks: list[str] = []
|
chunks: list[str] = []
|
||||||
|
last_ts = start
|
||||||
byte_count = 0
|
byte_count = 0
|
||||||
for t, _k, d in outputs:
|
for t, _k, d in outputs:
|
||||||
if start <= t < end:
|
if start <= t < end:
|
||||||
byte_count += len(d)
|
byte_count += len(d)
|
||||||
chunks.append(d)
|
chunks.append(d)
|
||||||
|
last_ts = t
|
||||||
if not chunks:
|
if not chunks:
|
||||||
return 0, False
|
return 0, False, ()
|
||||||
stripped = strip_ansi("".join(chunks))
|
stripped = strip_ansi("".join(chunks))
|
||||||
return byte_count, detect_error_in_output(stripped)
|
errored = detect_error_in_output(stripped)
|
||||||
|
prompts = tuple(extract_prompt_lines(
|
||||||
|
stripped, base_ts=last_ts, max_chars=PROMPT_LINE_MAX_CHARS,
|
||||||
|
))
|
||||||
|
return byte_count, errored, prompts
|
||||||
|
|
||||||
|
|
||||||
def build_session_context(
|
def build_session_context(
|
||||||
@@ -349,7 +375,7 @@ def build_session_context(
|
|||||||
typing_bursts = _split_typing_bursts(iats)
|
typing_bursts = _split_typing_bursts(iats)
|
||||||
backspace_count, backspace_iats, kill_line_count = _scan_correction_signals(inputs)
|
backspace_count, backspace_iats, kill_line_count = _scan_correction_signals(inputs)
|
||||||
commands = _segment_commands(inputs)
|
commands = _segment_commands(inputs)
|
||||||
commands = _annotate_commands_with_output(commands, outputs)
|
commands, prompt_lines = _annotate_commands_with_output(commands, outputs)
|
||||||
inter_cmd_iats = tuple(
|
inter_cmd_iats = tuple(
|
||||||
max(0.0, commands[i + 1].start_ts - commands[i].end_ts)
|
max(0.0, commands[i + 1].start_ts - commands[i].end_ts)
|
||||||
for i in range(len(commands) - 1)
|
for i in range(len(commands) - 1)
|
||||||
@@ -380,4 +406,5 @@ def build_session_context(
|
|||||||
backspace_iats=backspace_iats,
|
backspace_iats=backspace_iats,
|
||||||
kill_line_count=kill_line_count,
|
kill_line_count=kill_line_count,
|
||||||
intra_command_iats=intra_command_iats,
|
intra_command_iats=intra_command_iats,
|
||||||
|
prompt_lines=prompt_lines,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -74,6 +74,24 @@ class PasteBurst:
|
|||||||
event_count: int
|
event_count: int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class PromptLine:
|
||||||
|
"""One PS1 prompt line detected in the output stream.
|
||||||
|
|
||||||
|
PII trade-off (ANTI-authorised at Phase F): ``raw_line`` retains
|
||||||
|
the ANSI-stripped text of the prompt — hostnames / usernames /
|
||||||
|
cwd / etc. — because F.1 / F.3 / E.4 read off it. Capped at
|
||||||
|
``PROMPT_LINE_MAX_CHARS``. PromptLine instances live on
|
||||||
|
``SessionContext.prompt_lines``; only derived primitive values
|
||||||
|
(``bash`` / ``en-US`` / ``present``) leave the engine.
|
||||||
|
"""
|
||||||
|
|
||||||
|
ts: float
|
||||||
|
suffix_char: str # one of $ # % >
|
||||||
|
raw_line: str # ANSI stripped, capped at PROMPT_LINE_MAX_CHARS
|
||||||
|
is_root: bool # suffix_char == '#'
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True, slots=True)
|
@dataclass(frozen=True, slots=True)
|
||||||
class Command:
|
class Command:
|
||||||
"""One command-line invocation, segmented from the input stream.
|
"""One command-line invocation, segmented from the input stream.
|
||||||
@@ -115,6 +133,7 @@ class Command:
|
|||||||
pipe_count: int = 0
|
pipe_count: int = 0
|
||||||
errored: bool = False
|
errored: bool = False
|
||||||
output_bytes: int = 0
|
output_bytes: int = 0
|
||||||
|
followed_by_prompt: bool = False
|
||||||
|
|
||||||
|
|
||||||
def hash_token(token: str) -> str:
|
def hash_token(token: str) -> str:
|
||||||
@@ -122,6 +141,73 @@ def hash_token(token: str) -> str:
|
|||||||
return hashlib.sha256(token.encode("utf-8")).hexdigest()
|
return hashlib.sha256(token.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
# Prompt-line detection (Step F.0). A prompt line ends with one of
|
||||||
|
# $/#/%/> followed by a space or end-of-line. The trailing space /
|
||||||
|
# newline is what tells us this is a *prompt* not just a sentence
|
||||||
|
# ending in those characters. We require either the space variant or
|
||||||
|
# the EOL variant to be present right after the suffix.
|
||||||
|
_PROMPT_LINE_RE = re.compile(
|
||||||
|
r"""
|
||||||
|
(?:^|\n) # line start
|
||||||
|
(?P<line> # capture the prompt line itself
|
||||||
|
[^\n]*? # any line content (non-greedy)
|
||||||
|
(?P<suffix>[$\#%>]) # prompt suffix
|
||||||
|
\ ? # optional trailing space (PS1 default has it)
|
||||||
|
)
|
||||||
|
(?=\n|\Z) # at end of line / end of buffer
|
||||||
|
""",
|
||||||
|
re.VERBOSE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_prompt_suffix(line: str) -> str | None:
|
||||||
|
"""Return the suffix character if ``line`` looks like a PS1 prompt.
|
||||||
|
|
||||||
|
``line`` is one logical output line, ANSI-stripped, trailing
|
||||||
|
whitespace included. The discriminating shape: any text ending in
|
||||||
|
one of ``$ # % >`` optionally followed by a single space. We require
|
||||||
|
the line to be non-empty and the suffix to be the rightmost
|
||||||
|
non-whitespace character.
|
||||||
|
"""
|
||||||
|
stripped = line.rstrip()
|
||||||
|
if not stripped:
|
||||||
|
return None
|
||||||
|
last = stripped[-1]
|
||||||
|
return last if last in ("$", "#", "%", ">") else None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_prompt_lines(
|
||||||
|
text: str,
|
||||||
|
*,
|
||||||
|
base_ts: float,
|
||||||
|
max_chars: int,
|
||||||
|
) -> Iterator[PromptLine]:
|
||||||
|
"""Yield prompt lines detected in ``text`` (already ANSI-stripped).
|
||||||
|
|
||||||
|
All emitted prompts share ``base_ts`` — the caller is responsible
|
||||||
|
for slicing output by event window before calling. A given output
|
||||||
|
chunk yields **at most one prompt line** (the trailing one), but
|
||||||
|
multi-line chunks containing multiple distinct prompts (mid-stream
|
||||||
|
redraws) yield each. ``raw_line`` is capped at ``max_chars`` and
|
||||||
|
leading/trailing whitespace stripped (preserving internal layout).
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return
|
||||||
|
for raw in text.split("\n"):
|
||||||
|
suffix = _detect_prompt_suffix(raw)
|
||||||
|
if suffix is None:
|
||||||
|
continue
|
||||||
|
line = raw.strip()
|
||||||
|
if len(line) > max_chars:
|
||||||
|
line = line[-max_chars:]
|
||||||
|
yield PromptLine(
|
||||||
|
ts=base_ts,
|
||||||
|
suffix_char=suffix,
|
||||||
|
raw_line=line,
|
||||||
|
is_root=(suffix == "#"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def parse_shard_line(line: str) -> AsciinemaEvent | None:
|
def parse_shard_line(line: str) -> AsciinemaEvent | None:
|
||||||
"""Turn one shard JSONL line into an :data:`AsciinemaEvent`.
|
"""Turn one shard JSONL line into an :data:`AsciinemaEvent`.
|
||||||
|
|
||||||
|
|||||||
@@ -218,6 +218,16 @@ LANDING_RITUAL_FIRST_N: int = 5
|
|||||||
LANDING_RITUAL_HIT_MIN: int = 2
|
LANDING_RITUAL_HIT_MIN: int = 2
|
||||||
LANDING_RITUAL_MIN_COMMANDS: int = 3
|
LANDING_RITUAL_MIN_COMMANDS: int = 3
|
||||||
|
|
||||||
|
# ── F.0 prompt-line detector ──────────────────────────────────────────────
|
||||||
|
# A prompt line in the output stream ends with one of these characters
|
||||||
|
# followed by a space or EOL. ``$`` and ``#`` are sh/bash; ``%`` is zsh;
|
||||||
|
# ``>`` is fish / cmd.exe / powershell (disambiguated by line content
|
||||||
|
# at F.1 time). Capped at 256 chars to bound memory; ANTI authorised
|
||||||
|
# retaining PS1 text on ctx (PII relaxation), but a malicious operator
|
||||||
|
# inflating the prompt buffer is still bounded.
|
||||||
|
PROMPT_SUFFIX_CHARS: frozenset[str] = frozenset({"$", "#", "%", ">"})
|
||||||
|
PROMPT_LINE_MAX_CHARS: int = 256
|
||||||
|
|
||||||
# ── motor.keystroke_cadence (Step B.1) ──────────────────────────────────────
|
# ── motor.keystroke_cadence (Step B.1) ──────────────────────────────────────
|
||||||
# Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between
|
# Typing bursts split at gaps > IKI_THINK_MAX_S so think-pauses between
|
||||||
# commands don't inflate the within-burst CV. Mirrors the prototype's
|
# commands don't inflate the within-burst CV. Mirrors the prototype's
|
||||||
|
|||||||
142
tests/profiler/behave_shell/test_prompt_line_detection.py
Normal file
142
tests/profiler/behave_shell/test_prompt_line_detection.py
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
"""Step F.0: prompt-line detector.
|
||||||
|
|
||||||
|
The detector is shared infrastructure (no primitive emit). These tests
|
||||||
|
pin ``PromptLine`` semantics + ``Command.followed_by_prompt`` directly
|
||||||
|
via ``build_session_context``. F.1 / F.3 / E.4 all depend on these
|
||||||
|
fields, so any drift here breaks four downstream primitives.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell import extract_session
|
||||||
|
from decnet.profiler.behave_shell._ctx import build_session_context
|
||||||
|
from decnet.profiler.behave_shell._parse import (
|
||||||
|
AsciinemaEvent,
|
||||||
|
PromptLine,
|
||||||
|
extract_prompt_lines,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _typed(text: str, t0: float = 0.0, dt: float = 0.05) -> list[AsciinemaEvent]:
|
||||||
|
return [(t0 + i * dt, "i", c) for i, c in enumerate(text)]
|
||||||
|
|
||||||
|
|
||||||
|
# ── extract_prompt_lines ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_bash_prompt_detected() -> None:
|
||||||
|
lines = list(extract_prompt_lines(
|
||||||
|
"anti@host:~$ ", base_ts=1.0, max_chars=256,
|
||||||
|
))
|
||||||
|
assert len(lines) == 1
|
||||||
|
assert lines[0].suffix_char == "$"
|
||||||
|
assert lines[0].is_root is False
|
||||||
|
assert "anti@host" in lines[0].raw_line
|
||||||
|
|
||||||
|
|
||||||
|
def test_root_prompt_detected_as_root() -> None:
|
||||||
|
lines = list(extract_prompt_lines(
|
||||||
|
"root@host:/etc# ", base_ts=2.0, max_chars=256,
|
||||||
|
))
|
||||||
|
assert len(lines) == 1
|
||||||
|
assert lines[0].suffix_char == "#"
|
||||||
|
assert lines[0].is_root is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_zsh_prompt_detected() -> None:
|
||||||
|
lines = list(extract_prompt_lines(
|
||||||
|
"host% ", base_ts=3.0, max_chars=256,
|
||||||
|
))
|
||||||
|
assert len(lines) == 1
|
||||||
|
assert lines[0].suffix_char == "%"
|
||||||
|
|
||||||
|
|
||||||
|
def test_powershell_prompt_detected() -> None:
|
||||||
|
lines = list(extract_prompt_lines(
|
||||||
|
"PS C:\\Users\\anti> ", base_ts=4.0, max_chars=256,
|
||||||
|
))
|
||||||
|
assert len(lines) == 1
|
||||||
|
assert lines[0].suffix_char == ">"
|
||||||
|
assert "PS " in lines[0].raw_line
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_output_no_prompt() -> None:
|
||||||
|
lines = list(extract_prompt_lines(
|
||||||
|
"file1\nfile2\nfile3\n", base_ts=5.0, max_chars=256,
|
||||||
|
))
|
||||||
|
assert lines == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_long_prompt_capped_to_max_chars() -> None:
|
||||||
|
long = "x" * 500 + "$ "
|
||||||
|
lines = list(extract_prompt_lines(long, base_ts=6.0, max_chars=256))
|
||||||
|
assert len(lines) == 1
|
||||||
|
assert len(lines[0].raw_line) <= 256
|
||||||
|
assert lines[0].suffix_char == "$"
|
||||||
|
|
||||||
|
|
||||||
|
def test_multi_line_output_with_trailing_prompt() -> None:
|
||||||
|
"""Mid-stream output then trailing prompt → one prompt detected."""
|
||||||
|
text = "total 12\ndrwxr-xr-x user 4096 May 4 .\nanti@host:~$ "
|
||||||
|
lines = list(extract_prompt_lines(text, base_ts=7.0, max_chars=256))
|
||||||
|
assert len(lines) == 1
|
||||||
|
assert lines[0].suffix_char == "$"
|
||||||
|
|
||||||
|
|
||||||
|
def test_ansi_wrapped_prompt_detected_after_strip() -> None:
|
||||||
|
"""ANSI-coloured prompt → still detected (strip happens inside _output_window)."""
|
||||||
|
events: list[AsciinemaEvent] = [
|
||||||
|
*_typed("ls\r", t0=0.0),
|
||||||
|
(0.20, "o", "file1\n"),
|
||||||
|
(0.30, "o", "\x1b[1;32manti@host\x1b[0m:\x1b[34m~\x1b[0m$ "),
|
||||||
|
]
|
||||||
|
ctx = build_session_context(events, sid="prompt-ansi", source="test")
|
||||||
|
assert len(ctx.prompt_lines) == 1
|
||||||
|
assert ctx.prompt_lines[0].suffix_char == "$"
|
||||||
|
|
||||||
|
|
||||||
|
# ── SessionContext.prompt_lines + Command.followed_by_prompt ────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_output_no_prompts() -> None:
|
||||||
|
events = _typed("ls\r", t0=0.0)
|
||||||
|
ctx = build_session_context(events, sid="prompt-empty", source="test")
|
||||||
|
assert ctx.prompt_lines == ()
|
||||||
|
assert ctx.commands[0].followed_by_prompt is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_command_followed_by_prompt_marks_field() -> None:
|
||||||
|
events: list[AsciinemaEvent] = [
|
||||||
|
*_typed("ls\r", t0=0.0),
|
||||||
|
(0.20, "o", "file1\nanti@host:~$ "),
|
||||||
|
]
|
||||||
|
ctx = build_session_context(events, sid="prompt-followed", source="test")
|
||||||
|
assert ctx.commands[0].followed_by_prompt is True
|
||||||
|
assert len(ctx.prompt_lines) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_last_command_no_trailing_prompt() -> None:
|
||||||
|
"""Two commands, only the first has a trailing prompt."""
|
||||||
|
events: list[AsciinemaEvent] = [
|
||||||
|
*_typed("ls\r", t0=0.0),
|
||||||
|
(0.20, "o", "file1\nanti@host:~$ "),
|
||||||
|
*_typed("foo\r", t0=1.0),
|
||||||
|
(1.20, "o", "bash: foo: command not found\n"),
|
||||||
|
]
|
||||||
|
ctx = build_session_context(events, sid="prompt-mid", source="test")
|
||||||
|
assert len(ctx.commands) == 2
|
||||||
|
assert ctx.commands[0].followed_by_prompt is True
|
||||||
|
assert ctx.commands[1].followed_by_prompt is False
|
||||||
|
|
||||||
|
|
||||||
|
# ── PII regression ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_pii_prompt_text_does_not_leak_to_observations() -> None:
|
||||||
|
"""PromptLine.raw_line lives on ctx, never in observation JSON."""
|
||||||
|
events: list[AsciinemaEvent] = [
|
||||||
|
*_typed("ls\r", t0=0.0),
|
||||||
|
(0.20, "o", "file1\nsecret-host-name@internal:~$ "),
|
||||||
|
]
|
||||||
|
out = list(extract_session(events, sid="prompt-pii"))
|
||||||
|
for obs in out:
|
||||||
|
assert "secret-host-name" not in obs.model_dump_json()
|
||||||
Reference in New Issue
Block a user