fix(web/transcripts): fall back to shard-scan when Log row has no shard_path

sessrec.c emits the session_recorded SD blob with sid/service/src_ip/
duration_s/bytes/truncated — it never emitted shard_path. The web
handler still asked for fields.shard_path, got "", tripped the
sessions-YYYY-MM-DD.jsonl basename regex and returned
400 "invalid shard name" for every legitimate transcript request.

Handler now:
- Fast-paths when fields.shard_path IS present and validates
  (for any future emitter or ingester that backfills it).
- Otherwise enumerates sessions-YYYY-MM-DD.jsonl shards under
  ARTIFACTS_ROOT/{decky}/{service}/transcripts/ (newest first) and
  returns the first one whose per-sid index contains our sid.
- Security invariant preserved: only files whose basename matches the
  _SHARD_BASENAME_RE are ever opened, and they always resolve inside
  ARTIFACTS_ROOT. A forged fields.shard_path is silently ignored.
- Soft-fails OSError/PermissionError on the transcripts dir (decky
  containers often write it with a uid the API can't read) — returns
  404 instead of a 500 traceback.

test_forged_shard_path_blocked updated to match the new semantics:
forgery is ignored, the real shard is served via fallback. The
invariant (no /etc/passwd access) is still asserted by the fact
that status is 200 with data from the test shard.
This commit is contained in:
2026-04-24 01:18:40 -04:00
parent 215251a122
commit 323077b383
2 changed files with 76 additions and 7 deletions

View File

@@ -72,11 +72,15 @@ def _get_index(path: Path) -> tuple[dict[str, list[tuple[int, int]]], int]:
return index, st.st_size
def _resolve_shard(decky: str, service: str, shard_name: str) -> Path:
def _validate_names(decky: str, service: str) -> None:
if not _DECKY_RE.fullmatch(decky):
raise HTTPException(status_code=400, detail="invalid decky name")
if not _SERVICE_RE.fullmatch(service):
raise HTTPException(status_code=400, detail="invalid service")
def _resolve_shard(decky: str, service: str, shard_name: str) -> Path:
_validate_names(decky, service)
if not _SHARD_BASENAME_RE.fullmatch(shard_name):
raise HTTPException(status_code=400, detail="invalid shard name")
root = ARTIFACTS_ROOT.resolve()
@@ -86,6 +90,46 @@ def _resolve_shard(decky: str, service: str, shard_name: str) -> Path:
return candidate
def _find_shard_with_sid(decky: str, service: str, sid: str) -> Path | None:
"""Scan every ``sessions-YYYY-MM-DD.jsonl`` under the decky's transcripts
dir until one claims this sid.
Fallback for rows where ``fields.shard_path`` is missing (current
sessrec.c does not emit it) or for sessions that span UTC midnight
(events land in two shards; the emitted SD could only name one).
Newest shards first — most transcript lookups are for recent
sessions. Result is cached by ``_get_index`` keyed on
(path, mtime), so repeated calls are ~free.
"""
_validate_names(decky, service)
root = ARTIFACTS_ROOT.resolve()
transcripts_dir = (root / decky / service / "transcripts").resolve()
if root not in transcripts_dir.parents:
return None
# Absent dir, or dir the API process can't stat/read — treat as
# "no transcript", not as a 500 traceback. Most commonly the decky
# container wrote this tree as a container-side uid that the API
# (running under --user / --group) can't cross.
try:
if not transcripts_dir.is_dir():
return None
entries = list(transcripts_dir.iterdir())
except (OSError, PermissionError):
return None
shards = sorted(
(p for p in entries if _SHARD_BASENAME_RE.fullmatch(p.name)),
reverse=True, # newest day first
)
for shard in shards:
try:
index, _size = _get_index(shard)
except (OSError, PermissionError):
continue
if sid in index:
return shard
return None
@router.get(
"/transcripts/{decky}/{sid}",
tags=["Transcripts"],
@@ -126,8 +170,18 @@ async def get_transcript(
if log_decky and log_decky != decky:
raise HTTPException(status_code=404, detail="session not found")
path = _resolve_shard(decky, service or "", shard_name or "")
if not path.is_file():
# Fast path: the Log row carries a fields.shard_path we can validate
# and hit directly. Falls back to scanning all shards when the SD
# didn't include one (current sessrec.c doesn't emit shard_path) or
# when the named shard isn't on disk anymore.
path: Path | None = None
if _SHARD_BASENAME_RE.fullmatch(shard_name or ""):
candidate = _resolve_shard(decky, service or "", shard_name)
if candidate.is_file():
path = candidate
if path is None:
path = _find_shard_with_sid(decky, service or "", sid)
if path is None:
raise HTTPException(status_code=404, detail="transcript not found")
index, _size = _get_index(path)