fix(behave_shell/F.0): tighten prompt detector — log lines ending in '>' no longer vote

_detect_prompt_suffix accepted ANY line ending in $#%> as a PS1 prompt,
so a single `cat /var/log/dpkg.log` (195 lines closing in `<none>`)
flooded environmental.shell_type votes and flipped a plainly-bash
session to fish.

A prompt line now requires either a trailing space after the suffix
(default PS1 shape across bash/zsh/fish/PowerShell) or a PS1-shape
token (user@host, "PS " prefix, or a Windows drive-letter prefix).

Regression tests pin the dpkg.log false-positive and a $-terminated
prose line.
This commit is contained in:
2026-05-09 02:57:40 -04:00
parent 0c1fc68b13
commit 65ddaaa681
2 changed files with 61 additions and 5 deletions

View File

@@ -160,20 +160,50 @@ _PROMPT_LINE_RE = re.compile(
)
_PS1_SHAPE_RE = re.compile(
# A line that LOOKS like a PS1 prompt carries at least one of:
# - user@host (bash/zsh/fish defaults)
# - "PS " prefix (PowerShell)
# - drive-letter prefix "C:\" / "C:/" (cmd.exe / PowerShell)
# These tokens are extremely rare in the body of generic command
# output, so they discriminate prompts from log lines that
# incidentally end with one of $#%>.
r"(?:[\w.-]+@[\w.-]+|^\s*PS\s|[A-Za-z]:[\\/])"
)
def _detect_prompt_suffix(line: str) -> str | None:
"""Return the suffix character if ``line`` looks like a PS1 prompt.
``line`` is one logical output line, ANSI-stripped, trailing
whitespace included. The discriminating shape: any text ending in
one of ``$ # % >`` optionally followed by a single space. We require
the line to be non-empty and the suffix to be the rightmost
non-whitespace character.
whitespace included. The discriminating shape: a line ending in
one of ``$ # % >``, AND either
* the original line ends with the suffix followed by a trailing
space (the default PS1 shape across bash / zsh / fish /
PowerShell — ``$ ``, ``# ``, ``% ``, ``> ``), OR
* the line carries a recognisable PS1-shape token (``user@host``,
``PS `` prefix, or a Windows drive-letter prefix).
Without this guard, command output that incidentally ends in one
of the suffix characters — e.g. ``dpkg.log`` lines that close with
``<none>`` — was being voted into the shell-type mode and could
flip the result to ``fish`` for an obvious-bash session.
"""
stripped = line.rstrip()
if not stripped:
return None
last = stripped[-1]
return last if last in ("$", "#", "%", ">") else None
if last not in ("$", "#", "%", ">"):
return None
# Prefer the cheap structural check: was there a trailing space
# after the suffix in the original line? (Default PS1s all carry
# one.) Falls back to a PS1-shape token search.
if len(line) > len(stripped) and line[len(stripped)] == " ":
return last
if _PS1_SHAPE_RE.search(stripped):
return last
return None
def extract_prompt_lines(

View File

@@ -66,6 +66,32 @@ def test_clean_output_no_prompt() -> None:
assert lines == []
def test_log_lines_ending_in_gt_are_not_prompts() -> None:
"""``dpkg.log`` style lines close with ``<none>`` — incidentally
ending in ``>``. They must NOT register as fish prompts; otherwise
a single ``cat /var/log/dpkg.log`` would flood ``shell_type`` votes
and flip the mode for a plainly-bash session.
"""
text = (
"2026-05-09 02:18:09 configure libssl3:amd64 3.0.19-1~deb12u2 <none>\n"
"2026-05-09 02:18:09 configure libexpat1:amd64 2.5.0-1+deb12u2 <none>\n"
"2026-05-09 02:18:10 configure python3.11-minimal:amd64 3.11.2-6 <none>\n"
"root@host:~# "
)
lines = list(extract_prompt_lines(text, base_ts=10.0, max_chars=256))
assert len(lines) == 1
assert lines[0].suffix_char == "#"
def test_output_line_ending_in_dollar_without_ps1_shape_rejected() -> None:
"""Sentence that happens to end in ``$`` (e.g. shell variable in
a doc) without trailing space and without a PS1 shape token must
not be treated as a prompt."""
text = "use $PATH or $HOME\nset -- $\n"
lines = list(extract_prompt_lines(text, base_ts=11.0, max_chars=256))
assert lines == []
def test_long_prompt_capped_to_max_chars() -> None:
long = "x" * 500 + "$ "
lines = list(extract_prompt_lines(long, base_ts=6.0, max_chars=256))