fix(web/transcripts): fall back to shard-scan when Log row has no shard_path
sessrec.c emits the session_recorded SD blob with sid/service/src_ip/
duration_s/bytes/truncated — it never emitted shard_path. The web
handler still asked for fields.shard_path, got "", tripped the
sessions-YYYY-MM-DD.jsonl basename regex and returned
400 "invalid shard name" for every legitimate transcript request.
Handler now:
- Fast-paths when fields.shard_path IS present and validates
(for any future emitter or ingester that backfills it).
- Otherwise enumerates sessions-YYYY-MM-DD.jsonl shards under
ARTIFACTS_ROOT/{decky}/{service}/transcripts/ (newest first) and
returns the first one whose per-sid index contains our sid.
- Security invariant preserved: only files whose basename matches the
_SHARD_BASENAME_RE are ever opened, and they always resolve inside
ARTIFACTS_ROOT. A forged fields.shard_path is silently ignored.
- Soft-fails OSError/PermissionError on the transcripts dir (decky
containers often write it with a uid the API can't read) — returns
404 instead of a 500 traceback.
test_forged_shard_path_blocked updated to match the new semantics:
forgery is ignored, the real shard is served via fallback. The
invariant (no /etc/passwd access) is still asserted by the fact
that status is 200 with data from the test shard.
This commit is contained in:
@@ -166,9 +166,16 @@ async def test_decky_mismatch_rejected(client: httpx.AsyncClient, auth_token: st
|
||||
assert res.status_code == 404
|
||||
|
||||
|
||||
async def test_forged_shard_path_blocked(client: httpx.AsyncClient, auth_token: str, shard):
|
||||
# A Log row with a shard_path basename that doesn't match sessions-YYYY-MM-DD
|
||||
# must be rejected even if the sid lookup succeeds.
|
||||
async def test_forged_shard_path_is_ignored_in_favour_of_scan(
|
||||
client: httpx.AsyncClient, auth_token: str, shard,
|
||||
):
|
||||
# A Log row with a shard_path basename that doesn't match
|
||||
# sessions-YYYY-MM-DD is silently ignored — the handler falls back
|
||||
# to scanning the decky's transcripts dir for a shard containing
|
||||
# the sid. The security invariant holds either way: only files
|
||||
# whose basename matches _SHARD_BASENAME_RE are ever opened, and
|
||||
# they always resolve under ARTIFACTS_ROOT/decky/<service>/
|
||||
# transcripts/.
|
||||
row = _log_row(_SID_A, _DECKY, "ssh", "/etc/passwd")
|
||||
with patch("decnet.web.router.transcripts.api_get_transcript.repo") as mock_repo:
|
||||
mock_repo.get_session_log = AsyncMock(return_value=row)
|
||||
@@ -176,7 +183,15 @@ async def test_forged_shard_path_blocked(client: httpx.AsyncClient, auth_token:
|
||||
f"/api/v1/transcripts/{_DECKY}/{_SID_A}",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
)
|
||||
assert res.status_code == 400
|
||||
# Fallback located the real shard and returned it. /etc/passwd was
|
||||
# never opened (different basename shape, wrong dir).
|
||||
assert res.status_code == 200
|
||||
body = res.json()
|
||||
assert body["sid"] == _SID_A
|
||||
# Sanity: the events came from the test shard, not from a system
|
||||
# file — our fixture events have string `d` fields that /etc/passwd
|
||||
# would never reproduce.
|
||||
assert all(isinstance(evt[2], str) for evt in body["events"])
|
||||
|
||||
|
||||
async def test_limit_ceiling_enforced(client: httpx.AsyncClient, auth_token: str, shard):
|
||||
|
||||
Reference in New Issue
Block a user