fix(profiler/behave_shell): tolerate non-UTF-8 bytes in shard reads
Real-world bug surfaced on the first live decky run: sessrec.c's json_escape (decnet/templates/_shared/sessrec/sessrec.c:111-141) only escapes bytes < 0x20 + DEL — bytes >= 0x80 pass through raw. An attacker pasting Latin-1 / GB18030 / any non-UTF-8 8-bit text yields a shard line that chokes Python's default UTF-8 text-mode read with 'utf-8 codec can't decode byte 0xac'. Three changes: 1. _events_for_sid now opens with errors='surrogateescape', preserving byte fidelity through the JSON parse. Surrogate-half chars correctly fail isascii() / isalpha() so the typed-letter histograms filter them out automatically. Tightening sessrec.c to escape >= 0x80 is filed for v0.2 — that's the proper forensic-data fix; the surrogateescape read makes the engine robust meanwhile. 2. Regression test (test_handler_tolerates_non_utf8_bytes_in_shard) builds a shard with raw 0xAC bytes inside a JSON 'data' string and asserts the handler still persists observations. 3. Collector's _emit_session now logs at WARNING (was DEBUG) when find_shard_with_sid returns None, citing the three usual causes (ARTIFACTS_ROOT perms, _SERVICE_RE whitelist, sessrec/collector race). Surfaces the silent-skip class of bug in seconds instead of hours — the first live run hid a perm mismatch (User=anti without SupplementaryGroups=decnet) for an entire session window before the symptom was traced upstream.
This commit is contained in:
@@ -313,16 +313,34 @@ class _SessionAggregator:
|
|||||||
# consumer skips honestly. Additive field; existing TTP consumers
|
# consumer skips honestly. Additive field; existing TTP consumers
|
||||||
# ignore it.
|
# ignore it.
|
||||||
shard_path: str | None = None
|
shard_path: str | None = None
|
||||||
|
resolve_error: str | None = None
|
||||||
if sid and decky and service:
|
if sid and decky and service:
|
||||||
try:
|
try:
|
||||||
resolved = find_shard_with_sid(decky, service, sid)
|
resolved = find_shard_with_sid(decky, service, sid)
|
||||||
except (ValueError, OSError, PermissionError) as exc:
|
except (ValueError, OSError, PermissionError) as exc:
|
||||||
logger.debug(
|
resolve_error = f"{type(exc).__name__}: {exc}"
|
||||||
"collector: shard resolve failed for sid=%s: %s", sid, exc,
|
|
||||||
)
|
|
||||||
resolved = None
|
resolved = None
|
||||||
if resolved is not None:
|
if resolved is not None:
|
||||||
shard_path = str(resolved)
|
shard_path = str(resolved)
|
||||||
|
if shard_path is None and sid:
|
||||||
|
# Loud-by-default — the BEHAVE-SHELL handler will skip
|
||||||
|
# session.ended events with shard_path=None, so a silent
|
||||||
|
# miss here means the profiler panel never hydrates. Surface
|
||||||
|
# the most common failure modes inline so the operator can
|
||||||
|
# diagnose without grepping decnet/artifacts/shards.py.
|
||||||
|
#
|
||||||
|
# 1. ARTIFACTS_ROOT not readable by the collector's user
|
||||||
|
# (perm 0750 decnet:decnet vs. User=anti without
|
||||||
|
# SupplementaryGroups=decnet).
|
||||||
|
# 2. service whitelist (_SERVICE_RE accepts ssh|telnet only).
|
||||||
|
# 3. sessrec hasn't flushed the shard for this sid yet
|
||||||
|
# (collector tick won the race; next tick recovers).
|
||||||
|
logger.warning(
|
||||||
|
"collector: shard_path=None decky=%s service=%s sid=%s "
|
||||||
|
"(error=%s) — profiler will skip this session.ended; "
|
||||||
|
"check ARTIFACTS_ROOT perms / service whitelist",
|
||||||
|
decky, service, sid, resolve_error or "shard not found",
|
||||||
|
)
|
||||||
|
|
||||||
payload: dict[str, Any] = {
|
payload: dict[str, Any] = {
|
||||||
"session_id": sid or None,
|
"session_id": sid or None,
|
||||||
|
|||||||
@@ -53,9 +53,17 @@ def _events_for_sid(shard: Path, sid: str) -> list[AsciinemaEvent]:
|
|||||||
Mirrors the loader pattern in
|
Mirrors the loader pattern in
|
||||||
``tests/profiler/behave_shell/test_calibration_grid.py``: skip
|
``tests/profiler/behave_shell/test_calibration_grid.py``: skip
|
||||||
headers / non-matching sids / unparseable lines silently.
|
headers / non-matching sids / unparseable lines silently.
|
||||||
|
|
||||||
|
``errors="surrogateescape"`` because sessrec.c's json_escape only
|
||||||
|
escapes bytes < 0x20 + DEL — bytes >= 0x80 pass through raw, so
|
||||||
|
a real shard with Latin-1 / GB18030 / arbitrary 8-bit attacker
|
||||||
|
paste content is NOT valid UTF-8. surrogateescape preserves byte
|
||||||
|
fidelity through the JSON read; downstream isalpha() / isascii()
|
||||||
|
correctly filter the surrogate-half chars out of the typed-letter
|
||||||
|
histograms. Filed for v0.2: tighten sessrec.c to escape >= 0x80.
|
||||||
"""
|
"""
|
||||||
events: list[AsciinemaEvent] = []
|
events: list[AsciinemaEvent] = []
|
||||||
with shard.open() as f:
|
with shard.open(encoding="utf-8", errors="surrogateescape") as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
try:
|
try:
|
||||||
rec = json.loads(line)
|
rec = json.loads(line)
|
||||||
|
|||||||
@@ -10,8 +10,6 @@ import json
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
from unittest.mock import AsyncMock
|
from unittest.mock import AsyncMock
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from decnet.profiler.behave_shell._handler import (
|
from decnet.profiler.behave_shell._handler import (
|
||||||
_build_evidence_ref,
|
_build_evidence_ref,
|
||||||
handle_session_ended,
|
handle_session_ended,
|
||||||
@@ -180,6 +178,42 @@ async def test_publish_none_is_silent(tmp_path) -> None:
|
|||||||
assert n > 0
|
assert n > 0
|
||||||
|
|
||||||
|
|
||||||
|
async def test_handler_tolerates_non_utf8_bytes_in_shard(tmp_path) -> None:
|
||||||
|
"""Real-world regression: sessrec.c's json_escape only escapes
|
||||||
|
bytes < 0x20 + DEL, so bytes >= 0x80 pass through raw. An attacker
|
||||||
|
pasting Latin-1 (or any non-UTF-8) text yields a shard line that
|
||||||
|
chokes Python's default UTF-8 text-mode read. The handler must
|
||||||
|
open with errors='surrogateescape' and process the session anyway.
|
||||||
|
"""
|
||||||
|
sid = _SID
|
||||||
|
shard_dir = tmp_path / _DECKY / _SERVICE / "transcripts"
|
||||||
|
shard_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
shard = shard_dir / "sessions-2026-05-09.jsonl"
|
||||||
|
# Header (valid JSON).
|
||||||
|
header = b'{"sid":"' + sid.encode() + b'","hdr":{"version":2,"width":80,"height":24,"timestamp":1}}\n'
|
||||||
|
# Synthetic typing burst — six commands so the calibration floor fires.
|
||||||
|
body_lines: list[bytes] = []
|
||||||
|
text = "ls\rps\rid\rwhoami\rpwd\runame\r"
|
||||||
|
for i, c in enumerate(text):
|
||||||
|
body_lines.append(
|
||||||
|
f'{{"sid":"{sid}","t":{i * 0.05},"ch":"i","d":"{c}"}}\n'.encode()
|
||||||
|
)
|
||||||
|
# ONE output event with a raw 0xAC byte inside the data string —
|
||||||
|
# exactly what sessrec emits for Latin-1 content.
|
||||||
|
body_lines.append(
|
||||||
|
b'{"sid":"' + sid.encode()
|
||||||
|
+ b'","t":5.0,"ch":"o","d":"prefix\xac\xac\xacsuffix"}\n'
|
||||||
|
)
|
||||||
|
with shard.open("wb") as f:
|
||||||
|
f.write(header)
|
||||||
|
for line in body_lines:
|
||||||
|
f.write(line)
|
||||||
|
|
||||||
|
repo = _make_repo()
|
||||||
|
n = await handle_session_ended(repo, _payload(str(shard)), None)
|
||||||
|
assert n > 0, "handler should persist at least one observation despite raw bytes"
|
||||||
|
|
||||||
|
|
||||||
async def test_attacker_uuid_in_payload_for_filter(tmp_path) -> None:
|
async def test_attacker_uuid_in_payload_for_filter(tmp_path) -> None:
|
||||||
"""Phase 5 amendment: every published observation carries the
|
"""Phase 5 amendment: every published observation carries the
|
||||||
DECNET-side ``attacker_uuid`` denorm (NOT the BEHAVE
|
DECNET-side ``attacker_uuid`` denorm (NOT the BEHAVE
|
||||||
|
|||||||
Reference in New Issue
Block a user