fix(web/transcripts): fall back to shard-scan when Log row has no shard_path
sessrec.c emits the session_recorded SD blob with sid/service/src_ip/
duration_s/bytes/truncated — it never emitted shard_path. The web
handler still asked for fields.shard_path, got "", tripped the
sessions-YYYY-MM-DD.jsonl basename regex and returned
400 "invalid shard name" for every legitimate transcript request.
Handler now:
- Fast-paths when fields.shard_path IS present and validates
(for any future emitter or ingester that backfills it).
- Otherwise enumerates sessions-YYYY-MM-DD.jsonl shards under
ARTIFACTS_ROOT/{decky}/{service}/transcripts/ (newest first) and
returns the first one whose per-sid index contains our sid.
- Security invariant preserved: only files whose basename matches the
_SHARD_BASENAME_RE are ever opened, and they always resolve inside
ARTIFACTS_ROOT. A forged fields.shard_path is silently ignored.
- Soft-fails OSError/PermissionError on the transcripts dir (decky
containers often write it with a uid the API can't read) — returns
404 instead of a 500 traceback.
test_forged_shard_path_blocked updated to match the new semantics:
forgery is ignored, the real shard is served via fallback. The
invariant (no /etc/passwd access) is still asserted by the fact
that status is 200 with data from the test shard.
This commit is contained in:
@@ -72,11 +72,15 @@ def _get_index(path: Path) -> tuple[dict[str, list[tuple[int, int]]], int]:
|
|||||||
return index, st.st_size
|
return index, st.st_size
|
||||||
|
|
||||||
|
|
||||||
def _resolve_shard(decky: str, service: str, shard_name: str) -> Path:
|
def _validate_names(decky: str, service: str) -> None:
|
||||||
if not _DECKY_RE.fullmatch(decky):
|
if not _DECKY_RE.fullmatch(decky):
|
||||||
raise HTTPException(status_code=400, detail="invalid decky name")
|
raise HTTPException(status_code=400, detail="invalid decky name")
|
||||||
if not _SERVICE_RE.fullmatch(service):
|
if not _SERVICE_RE.fullmatch(service):
|
||||||
raise HTTPException(status_code=400, detail="invalid service")
|
raise HTTPException(status_code=400, detail="invalid service")
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_shard(decky: str, service: str, shard_name: str) -> Path:
|
||||||
|
_validate_names(decky, service)
|
||||||
if not _SHARD_BASENAME_RE.fullmatch(shard_name):
|
if not _SHARD_BASENAME_RE.fullmatch(shard_name):
|
||||||
raise HTTPException(status_code=400, detail="invalid shard name")
|
raise HTTPException(status_code=400, detail="invalid shard name")
|
||||||
root = ARTIFACTS_ROOT.resolve()
|
root = ARTIFACTS_ROOT.resolve()
|
||||||
@@ -86,6 +90,46 @@ def _resolve_shard(decky: str, service: str, shard_name: str) -> Path:
|
|||||||
return candidate
|
return candidate
|
||||||
|
|
||||||
|
|
||||||
|
def _find_shard_with_sid(decky: str, service: str, sid: str) -> Path | None:
|
||||||
|
"""Scan every ``sessions-YYYY-MM-DD.jsonl`` under the decky's transcripts
|
||||||
|
dir until one claims this sid.
|
||||||
|
|
||||||
|
Fallback for rows where ``fields.shard_path`` is missing (current
|
||||||
|
sessrec.c does not emit it) or for sessions that span UTC midnight
|
||||||
|
(events land in two shards; the emitted SD could only name one).
|
||||||
|
Newest shards first — most transcript lookups are for recent
|
||||||
|
sessions. Result is cached by ``_get_index`` keyed on
|
||||||
|
(path, mtime), so repeated calls are ~free.
|
||||||
|
"""
|
||||||
|
_validate_names(decky, service)
|
||||||
|
root = ARTIFACTS_ROOT.resolve()
|
||||||
|
transcripts_dir = (root / decky / service / "transcripts").resolve()
|
||||||
|
if root not in transcripts_dir.parents:
|
||||||
|
return None
|
||||||
|
# Absent dir, or dir the API process can't stat/read — treat as
|
||||||
|
# "no transcript", not as a 500 traceback. Most commonly the decky
|
||||||
|
# container wrote this tree as a container-side uid that the API
|
||||||
|
# (running under --user / --group) can't cross.
|
||||||
|
try:
|
||||||
|
if not transcripts_dir.is_dir():
|
||||||
|
return None
|
||||||
|
entries = list(transcripts_dir.iterdir())
|
||||||
|
except (OSError, PermissionError):
|
||||||
|
return None
|
||||||
|
shards = sorted(
|
||||||
|
(p for p in entries if _SHARD_BASENAME_RE.fullmatch(p.name)),
|
||||||
|
reverse=True, # newest day first
|
||||||
|
)
|
||||||
|
for shard in shards:
|
||||||
|
try:
|
||||||
|
index, _size = _get_index(shard)
|
||||||
|
except (OSError, PermissionError):
|
||||||
|
continue
|
||||||
|
if sid in index:
|
||||||
|
return shard
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@router.get(
|
@router.get(
|
||||||
"/transcripts/{decky}/{sid}",
|
"/transcripts/{decky}/{sid}",
|
||||||
tags=["Transcripts"],
|
tags=["Transcripts"],
|
||||||
@@ -126,8 +170,18 @@ async def get_transcript(
|
|||||||
if log_decky and log_decky != decky:
|
if log_decky and log_decky != decky:
|
||||||
raise HTTPException(status_code=404, detail="session not found")
|
raise HTTPException(status_code=404, detail="session not found")
|
||||||
|
|
||||||
path = _resolve_shard(decky, service or "", shard_name or "")
|
# Fast path: the Log row carries a fields.shard_path we can validate
|
||||||
if not path.is_file():
|
# and hit directly. Falls back to scanning all shards when the SD
|
||||||
|
# didn't include one (current sessrec.c doesn't emit shard_path) or
|
||||||
|
# when the named shard isn't on disk anymore.
|
||||||
|
path: Path | None = None
|
||||||
|
if _SHARD_BASENAME_RE.fullmatch(shard_name or ""):
|
||||||
|
candidate = _resolve_shard(decky, service or "", shard_name)
|
||||||
|
if candidate.is_file():
|
||||||
|
path = candidate
|
||||||
|
if path is None:
|
||||||
|
path = _find_shard_with_sid(decky, service or "", sid)
|
||||||
|
if path is None:
|
||||||
raise HTTPException(status_code=404, detail="transcript not found")
|
raise HTTPException(status_code=404, detail="transcript not found")
|
||||||
|
|
||||||
index, _size = _get_index(path)
|
index, _size = _get_index(path)
|
||||||
|
|||||||
@@ -166,9 +166,16 @@ async def test_decky_mismatch_rejected(client: httpx.AsyncClient, auth_token: st
|
|||||||
assert res.status_code == 404
|
assert res.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
async def test_forged_shard_path_blocked(client: httpx.AsyncClient, auth_token: str, shard):
|
async def test_forged_shard_path_is_ignored_in_favour_of_scan(
|
||||||
# A Log row with a shard_path basename that doesn't match sessions-YYYY-MM-DD
|
client: httpx.AsyncClient, auth_token: str, shard,
|
||||||
# must be rejected even if the sid lookup succeeds.
|
):
|
||||||
|
# A Log row with a shard_path basename that doesn't match
|
||||||
|
# sessions-YYYY-MM-DD is silently ignored — the handler falls back
|
||||||
|
# to scanning the decky's transcripts dir for a shard containing
|
||||||
|
# the sid. The security invariant holds either way: only files
|
||||||
|
# whose basename matches _SHARD_BASENAME_RE are ever opened, and
|
||||||
|
# they always resolve under ARTIFACTS_ROOT/decky/<service>/
|
||||||
|
# transcripts/.
|
||||||
row = _log_row(_SID_A, _DECKY, "ssh", "/etc/passwd")
|
row = _log_row(_SID_A, _DECKY, "ssh", "/etc/passwd")
|
||||||
with patch("decnet.web.router.transcripts.api_get_transcript.repo") as mock_repo:
|
with patch("decnet.web.router.transcripts.api_get_transcript.repo") as mock_repo:
|
||||||
mock_repo.get_session_log = AsyncMock(return_value=row)
|
mock_repo.get_session_log = AsyncMock(return_value=row)
|
||||||
@@ -176,7 +183,15 @@ async def test_forged_shard_path_blocked(client: httpx.AsyncClient, auth_token:
|
|||||||
f"/api/v1/transcripts/{_DECKY}/{_SID_A}",
|
f"/api/v1/transcripts/{_DECKY}/{_SID_A}",
|
||||||
headers={"Authorization": f"Bearer {auth_token}"},
|
headers={"Authorization": f"Bearer {auth_token}"},
|
||||||
)
|
)
|
||||||
assert res.status_code == 400
|
# Fallback located the real shard and returned it. /etc/passwd was
|
||||||
|
# never opened (different basename shape, wrong dir).
|
||||||
|
assert res.status_code == 200
|
||||||
|
body = res.json()
|
||||||
|
assert body["sid"] == _SID_A
|
||||||
|
# Sanity: the events came from the test shard, not from a system
|
||||||
|
# file — our fixture events have string `d` fields that /etc/passwd
|
||||||
|
# would never reproduce.
|
||||||
|
assert all(isinstance(evt[2], str) for evt in body["events"])
|
||||||
|
|
||||||
|
|
||||||
async def test_limit_ceiling_enforced(client: httpx.AsyncClient, auth_token: str, shard):
|
async def test_limit_ceiling_enforced(client: httpx.AsyncClient, auth_token: str, shard):
|
||||||
|
|||||||
Reference in New Issue
Block a user