From 65ddaaa6813dc51ca2a742bcf5db8e2fc4b2f7e1 Mon Sep 17 00:00:00 2001 From: anti Date: Sat, 9 May 2026 02:57:40 -0400 Subject: [PATCH] =?UTF-8?q?fix(behave=5Fshell/F.0):=20tighten=20prompt=20d?= =?UTF-8?q?etector=20=E2=80=94=20log=20lines=20ending=20in=20'>'=20no=20lo?= =?UTF-8?q?nger=20vote?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _detect_prompt_suffix accepted ANY line ending in $#%> as a PS1 prompt, so a single `cat /var/log/dpkg.log` (195 lines closing in ``) flooded environmental.shell_type votes and flipped a plainly-bash session to fish. A prompt line now requires either a trailing space after the suffix (default PS1 shape across bash/zsh/fish/PowerShell) or a PS1-shape token (user@host, "PS " prefix, or a Windows drive-letter prefix). Regression tests pin the dpkg.log false-positive and a $-terminated prose line. --- decnet/profiler/behave_shell/_parse.py | 40 ++++++++++++++++--- .../test_prompt_line_detection.py | 26 ++++++++++++ 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/decnet/profiler/behave_shell/_parse.py b/decnet/profiler/behave_shell/_parse.py index 5e1b0d5d..cfa1901e 100644 --- a/decnet/profiler/behave_shell/_parse.py +++ b/decnet/profiler/behave_shell/_parse.py @@ -160,20 +160,50 @@ _PROMPT_LINE_RE = re.compile( ) +_PS1_SHAPE_RE = re.compile( + # A line that LOOKS like a PS1 prompt carries at least one of: + # - user@host (bash/zsh/fish defaults) + # - "PS " prefix (PowerShell) + # - drive-letter prefix "C:\" / "C:/" (cmd.exe / PowerShell) + # These tokens are extremely rare in the body of generic command + # output, so they discriminate prompts from log lines that + # incidentally end with one of $#%>. + r"(?:[\w.-]+@[\w.-]+|^\s*PS\s|[A-Za-z]:[\\/])" +) + + def _detect_prompt_suffix(line: str) -> str | None: """Return the suffix character if ``line`` looks like a PS1 prompt. ``line`` is one logical output line, ANSI-stripped, trailing - whitespace included. The discriminating shape: any text ending in - one of ``$ # % >`` optionally followed by a single space. We require - the line to be non-empty and the suffix to be the rightmost - non-whitespace character. + whitespace included. The discriminating shape: a line ending in + one of ``$ # % >``, AND either + + * the original line ends with the suffix followed by a trailing + space (the default PS1 shape across bash / zsh / fish / + PowerShell — ``$ ``, ``# ``, ``% ``, ``> ``), OR + * the line carries a recognisable PS1-shape token (``user@host``, + ``PS `` prefix, or a Windows drive-letter prefix). + + Without this guard, command output that incidentally ends in one + of the suffix characters — e.g. ``dpkg.log`` lines that close with + ```` — was being voted into the shell-type mode and could + flip the result to ``fish`` for an obvious-bash session. """ stripped = line.rstrip() if not stripped: return None last = stripped[-1] - return last if last in ("$", "#", "%", ">") else None + if last not in ("$", "#", "%", ">"): + return None + # Prefer the cheap structural check: was there a trailing space + # after the suffix in the original line? (Default PS1s all carry + # one.) Falls back to a PS1-shape token search. + if len(line) > len(stripped) and line[len(stripped)] == " ": + return last + if _PS1_SHAPE_RE.search(stripped): + return last + return None def extract_prompt_lines( diff --git a/tests/profiler/behave_shell/test_prompt_line_detection.py b/tests/profiler/behave_shell/test_prompt_line_detection.py index 5606e976..78d80869 100644 --- a/tests/profiler/behave_shell/test_prompt_line_detection.py +++ b/tests/profiler/behave_shell/test_prompt_line_detection.py @@ -66,6 +66,32 @@ def test_clean_output_no_prompt() -> None: assert lines == [] +def test_log_lines_ending_in_gt_are_not_prompts() -> None: + """``dpkg.log`` style lines close with ```` — incidentally + ending in ``>``. They must NOT register as fish prompts; otherwise + a single ``cat /var/log/dpkg.log`` would flood ``shell_type`` votes + and flip the mode for a plainly-bash session. + """ + text = ( + "2026-05-09 02:18:09 configure libssl3:amd64 3.0.19-1~deb12u2 \n" + "2026-05-09 02:18:09 configure libexpat1:amd64 2.5.0-1+deb12u2 \n" + "2026-05-09 02:18:10 configure python3.11-minimal:amd64 3.11.2-6 \n" + "root@host:~# " + ) + lines = list(extract_prompt_lines(text, base_ts=10.0, max_chars=256)) + assert len(lines) == 1 + assert lines[0].suffix_char == "#" + + +def test_output_line_ending_in_dollar_without_ps1_shape_rejected() -> None: + """Sentence that happens to end in ``$`` (e.g. shell variable in + a doc) without trailing space and without a PS1 shape token must + not be treated as a prompt.""" + text = "use $PATH or $HOME\nset -- $\n" + lines = list(extract_prompt_lines(text, base_ts=11.0, max_chars=256)) + assert lines == [] + + def test_long_prompt_capped_to_max_chars() -> None: long = "x" * 500 + "$ " lines = list(extract_prompt_lines(long, base_ts=6.0, max_chars=256))