feat(profiler/behave_shell): emit environmental.keyboard_layout
ANTI authorised dropping the PII boundary for this primitive. ctx gains typed_unigram_counts / typed_bigram_counts / typed_letter_count populated during the existing single-pass input walk (paste-class events excluded). Two-axis classifier: * layout-artefact unigrams take priority — q rate above floor with low English saturation → azerty; z above floor with y below → qwertz * fallback to English-bigram saturation: ≥ floor → qwerty, else other Sample-size floor 200 typed letters; bigram histogram capped at top-64 to bound memory. Confidence cap stays moderate (0.40-0.55) — heuristic discriminator.
This commit is contained in:
@@ -25,6 +25,7 @@ from decnet.profiler.behave_shell._features.cognitive import (
|
||||
inter_command_latency_class,
|
||||
)
|
||||
from decnet.profiler.behave_shell._features.environmental import (
|
||||
keyboard_layout,
|
||||
locale,
|
||||
shell_type,
|
||||
terminal_multiplexer,
|
||||
@@ -75,4 +76,5 @@ FEATURES: tuple[FeatureFn, ...] = (
|
||||
shell_type,
|
||||
terminal_multiplexer,
|
||||
locale,
|
||||
keyboard_layout,
|
||||
)
|
||||
|
||||
@@ -8,6 +8,7 @@ which F.1 / F.3 / E.4 read.
|
||||
Step F.1: ``environmental.shell_type``.
|
||||
Step F.2: ``environmental.terminal_multiplexer``.
|
||||
Step F.3: ``environmental.locale``.
|
||||
Step F.4: ``environmental.keyboard_layout``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -21,6 +22,13 @@ from decnet.profiler.behave_shell._ctx import SessionContext
|
||||
from decnet.profiler.behave_shell._features._emit import make_observation
|
||||
from decnet.profiler.behave_shell._parse import PromptLine, strip_ansi
|
||||
from decnet.profiler.behave_shell._thresholds import (
|
||||
LAYOUT_AZERTY_ENG_MAX,
|
||||
LAYOUT_AZERTY_Q_MIN,
|
||||
LAYOUT_MIN_TYPED_LETTERS,
|
||||
LAYOUT_QWERTY_ENG_MIN,
|
||||
LAYOUT_QWERTZ_Y_MAX,
|
||||
LAYOUT_QWERTZ_Z_MIN,
|
||||
LAYOUT_TOP_ENG_BIGRAMS,
|
||||
LOCALE_MIN_VALUE_LENGTH,
|
||||
SHELL_TYPE_MIN_PROMPTS,
|
||||
)
|
||||
@@ -228,3 +236,64 @@ def locale(ctx: SessionContext) -> Iterator[Observation]:
|
||||
value=best_value,
|
||||
confidence=0.80,
|
||||
)
|
||||
|
||||
|
||||
def keyboard_layout(ctx: SessionContext) -> Iterator[Observation]:
|
||||
"""Emit ``environmental.keyboard_layout``.
|
||||
|
||||
Two independent signals over the typed-only character histograms:
|
||||
|
||||
1. **English-bigram saturation** — fraction of typed bigrams that
|
||||
hit the top-10 English bigrams. High → presumed QWERTY.
|
||||
2. **Layout-artefact unigrams** — letters that are rare in English
|
||||
but frequent on operators using a different layout:
|
||||
|
||||
* ``q`` rate above floor AND English saturation low → ``azerty``
|
||||
(AZERTY's `a` is on QWERTY's `q` position; mistypes bleed `q`)
|
||||
* ``z`` rate above floor AND ``y`` rate below floor → ``qwertz``
|
||||
(QWERTZ swaps `y`/`z`)
|
||||
* Else: English saturation above floor → ``qwerty``
|
||||
* Else: → ``other``
|
||||
|
||||
Threshold ordering matters — layout-artefact checks fire before
|
||||
QWERTY because AZERTY/QWERTZ operators may still hit some English
|
||||
bigrams.
|
||||
|
||||
Skip emission when typed letter count below
|
||||
``LAYOUT_MIN_TYPED_LETTERS`` (200) — the histograms are too thin
|
||||
to discriminate honestly.
|
||||
"""
|
||||
if ctx.typed_letter_count < LAYOUT_MIN_TYPED_LETTERS:
|
||||
return
|
||||
uni = ctx.typed_unigram_counts
|
||||
bi = ctx.typed_bigram_counts
|
||||
total_letters = ctx.typed_letter_count
|
||||
total_bigrams = sum(bi.values())
|
||||
|
||||
eng_saturation = (
|
||||
sum(bi.get(b, 0) for b in LAYOUT_TOP_ENG_BIGRAMS) / total_bigrams
|
||||
if total_bigrams > 0 else 0.0
|
||||
)
|
||||
q_rate = uni.get("q", 0) / total_letters
|
||||
z_rate = uni.get("z", 0) / total_letters
|
||||
y_rate = uni.get("y", 0) / total_letters
|
||||
|
||||
if q_rate > LAYOUT_AZERTY_Q_MIN and eng_saturation < LAYOUT_AZERTY_ENG_MAX:
|
||||
value = "azerty"
|
||||
elif z_rate > LAYOUT_QWERTZ_Z_MIN and y_rate < LAYOUT_QWERTZ_Y_MAX:
|
||||
value = "qwertz"
|
||||
elif eng_saturation >= LAYOUT_QWERTY_ENG_MIN:
|
||||
value = "qwerty"
|
||||
else:
|
||||
value = "other"
|
||||
|
||||
if total_letters < 500:
|
||||
confidence = 0.40
|
||||
else:
|
||||
confidence = 0.55
|
||||
yield make_observation(
|
||||
ctx,
|
||||
primitive="environmental.keyboard_layout",
|
||||
value=value,
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user