diff --git a/decnet/orchestrator/worker.py b/decnet/orchestrator/worker.py index e2f463ba..909d270a 100644 --- a/decnet/orchestrator/worker.py +++ b/decnet/orchestrator/worker.py @@ -385,7 +385,7 @@ async def _bump_synthetic_file_after_edit(repo, action, result) -> None: patch["content_hash"] = hashlib.sha256( new_body.encode("utf-8"), ).hexdigest() - patch["last_body"] = new_body[:65536] + patch["last_body"] = new_body await repo.update_synthetic_file(action.synthetic_file_uuid, patch) @@ -411,10 +411,7 @@ async def _record_synthetic_file(repo, action) -> None: "last_modified": now, "edit_count": 0, "content_hash": content_hash, - # Cap the persisted body — large blobs (DOCX/PDF/canary - # artifacts in stage 7) are wasted disk on this side; the - # decky filesystem holds the canonical bytes. - "last_body": body[:65536], + "last_body": body, } try: await repo.record_synthetic_file(row) @@ -432,7 +429,7 @@ async def _record_synthetic_file(repo, action) -> None: { "last_modified": now, "content_hash": content_hash, - "last_body": body[:65536], + "last_body": body, "edit_count": int(match.get("edit_count", 0)) + 1, }, ) diff --git a/decnet/web/db/models/realism.py b/decnet/web/db/models/realism.py index f7fba84a..0eaea29b 100644 --- a/decnet/web/db/models/realism.py +++ b/decnet/web/db/models/realism.py @@ -22,6 +22,15 @@ from sqlalchemy import Column, Index, Text, UniqueConstraint from sqlmodel import Field, SQLModel +SYNTHETIC_FILE_BODY_LIMIT = 65536 +"""Cap on persisted ``synthetic_files.last_body`` bytes. + +Enforced by the repo on both insert and update — callers may pass the +full body; the repo clips. Large blobs (DOCX/PDF, canary artifacts) are +wasted disk on the master side; the decky filesystem holds the canonical +bytes.""" + + class SyntheticFile(SQLModel, table=True): """One realism-planted file on one decky. diff --git a/decnet/web/db/sqlmodel_repo.py b/decnet/web/db/sqlmodel_repo.py index 175213d9..6b7eb9ec 100644 --- a/decnet/web/db/sqlmodel_repo.py +++ b/decnet/web/db/sqlmodel_repo.py @@ -3335,6 +3335,9 @@ class SQLModelRepository(BaseRepository): # ------------------------------------------------------------ realism async def record_synthetic_file(self, data: dict[str, Any]) -> str: + from decnet.web.db.models.realism import SYNTHETIC_FILE_BODY_LIMIT + if "last_body" in data and data["last_body"] is not None: + data = {**data, "last_body": data["last_body"][:SYNTHETIC_FILE_BODY_LIMIT]} async with self._session() as session: row = SyntheticFile(**data) session.add(row) @@ -3345,6 +3348,9 @@ class SQLModelRepository(BaseRepository): async def update_synthetic_file( self, row_uuid: str, data: dict[str, Any], ) -> None: + from decnet.web.db.models.realism import SYNTHETIC_FILE_BODY_LIMIT + if "last_body" in data and data["last_body"] is not None: + data = {**data, "last_body": data["last_body"][:SYNTHETIC_FILE_BODY_LIMIT]} async with self._session() as session: stmt = ( update(SyntheticFile) diff --git a/tests/realism/test_synthetic_files_truncation.py b/tests/realism/test_synthetic_files_truncation.py index 85bf85b0..eb7e7438 100644 --- a/tests/realism/test_synthetic_files_truncation.py +++ b/tests/realism/test_synthetic_files_truncation.py @@ -1,16 +1,12 @@ -"""``synthetic_files.last_body`` is capped at 64 KB. +"""``synthetic_files.last_body`` is capped at 64 KB by the repo. -The orchestrator caps the persisted body at 64 KB on every write -(create + edit) so the table doesn't bloat with large blobs. This -introduces a real edge: an EditAction whose ``previous_body`` is -sourced from the cap (not the file on disk) sees truncated bytes. +The repo clips on both insert and update so callers may pass the full +body. Large blobs (DOCX/PDF, canary artifacts) would bloat the table; +the decky filesystem holds the canonical bytes. -Today the realism templates produce well under 64 KB, so the edge -isn't reachable from the planted-content path. But a future change -that lifts the cap, an LLM that returns a long body, or a -``honeydoc_pdf`` body cultivated through the realism path could all -hit it. These tests pin the contract so a regression that drops the -cap or applies it inconsistently fails loudly. +These tests pin the contract so a regression that drops the cap or +applies it inconsistently fails loudly. Note: callers pass the *full* +body — the worker no longer clips; the repo does. """ from __future__ import annotations @@ -19,10 +15,11 @@ from datetime import datetime, timezone import pytest import pytest_asyncio +from decnet.web.db.models.realism import SYNTHETIC_FILE_BODY_LIMIT from decnet.web.db.sqlite.repository import SQLiteRepository -_LIMIT = 65536 # decnet/orchestrator/worker.py uses [:65536] +_LIMIT = SYNTHETIC_FILE_BODY_LIMIT @pytest_asyncio.fixture @@ -44,16 +41,14 @@ def _row(body: str) -> dict: "created_at": now, "last_modified": now, "edit_count": 0, - # The hash is over the *full* body in the orchestrator's write - # path; if the body comes from a row that was already truncated, - # the hash reflects the truncation. Tests check both paths. "content_hash": hashlib.sha256(body.encode("utf-8")).hexdigest(), - "last_body": body[:_LIMIT], + # Caller passes the full body — the repo clips. + "last_body": body, } @pytest.mark.asyncio -async def test_oversized_body_is_truncated_at_write(repo): +async def test_repo_clips_oversized_body_at_insert(repo): body = "A" * (_LIMIT * 2) uuid = await repo.record_synthetic_file(_row(body)) rows = await repo.list_synthetic_files(decky_uuid="d1") @@ -65,8 +60,6 @@ async def test_oversized_body_is_truncated_at_write(repo): @pytest.mark.asyncio async def test_body_at_exact_limit_is_preserved(repo): - """Boundary: a body of exactly 64 KB must not be silently - truncated. Off-by-one regression target.""" body = "B" * _LIMIT await repo.record_synthetic_file(_row(body)) rows = await repo.list_synthetic_files(decky_uuid="d1") @@ -74,27 +67,16 @@ async def test_body_at_exact_limit_is_preserved(repo): @pytest.mark.asyncio -async def test_pick_for_edit_returns_truncated_body(repo): - """Stage 3b contract: the edit candidate carries the *stored* - last_body — necessarily truncated when the original exceeded the - cap. Document the consequence so a future test author doesn't - expect the full body to round-trip.""" +async def test_pick_for_edit_returns_clipped_body(repo): body = "C" * (_LIMIT * 3) await repo.record_synthetic_file(_row(body)) candidate = await repo.pick_random_synthetic_file_for_edit("d1") assert candidate is not None assert len(candidate["last_body"]) == _LIMIT - # The edit driver mutates this body via realism.bodies.next_iteration, - # so callers must accept they're editing a truncated snapshot of - # the file that's actually on the decky. This is documented - # behaviour pre-v1; if the cap rises, lift _LIMIT here too. @pytest.mark.asyncio -async def test_edit_path_keeps_cap(repo): - """An update_synthetic_file call that tries to write a >cap body - must clip to the cap on the way in. Mirrors the orchestrator - worker's ``last_body=body[:65536]`` line.""" +async def test_repo_clips_oversized_body_at_update(repo): uuid = await repo.record_synthetic_file(_row("seed")) big = "D" * (_LIMIT * 4) await repo.update_synthetic_file( @@ -102,7 +84,7 @@ async def test_edit_path_keeps_cap(repo): { "last_modified": datetime.now(timezone.utc), "edit_count": 1, - "last_body": big[:_LIMIT], # caller is responsible for clipping + "last_body": big, }, ) rows = await repo.list_synthetic_files(decky_uuid="d1")