perf(ingester): batch log writes into bulk commits

The ingester now accumulates up to DECNET_BATCH_SIZE rows (default 100)
or DECNET_BATCH_MAX_WAIT_MS (default 250ms) before flushing through
repo.add_logs — one transaction, one COMMIT per batch instead of per
row. Under attacker traffic this collapses N commits into ⌈N/100⌉ and
takes most of the SQLite writer-lock contention off the hot path.

Flush semantics are cancel-safe: _position only advances after a batch
commits successfully, and the flush helper bails without touching the
DB if the enclosing task is being cancelled (lifespan teardown).
Un-flushed lines stay in the file and are re-read on next startup.

Tests updated to assert on add_logs (bulk) instead of the per-row
add_log that the ingester no longer uses, plus a new test that 250
lines flush in ≤5 calls.
This commit is contained in:
2026-04-17 16:37:34 -04:00
parent 11b9e85874
commit a10aee282f
3 changed files with 121 additions and 29 deletions

View File

@@ -93,6 +93,7 @@ class TestIngesterIsolation:
from decnet.web.ingester import log_ingestion_worker
mock_repo = MagicMock()
mock_repo.add_logs = AsyncMock()
mock_repo.get_state = AsyncMock(return_value=None)
mock_repo.set_state = AsyncMock()
iterations = 0
@@ -110,7 +111,7 @@ class TestIngesterIsolation:
await task
# Should have waited at least 2 iterations without crashing
assert iterations >= 2
mock_repo.add_log.assert_not_called()
mock_repo.add_logs.assert_not_called()
@pytest.mark.asyncio
async def test_ingester_survives_no_log_file_env(self):
@@ -135,6 +136,7 @@ class TestIngesterIsolation:
mock_repo = MagicMock()
mock_repo.add_log = AsyncMock()
mock_repo.add_logs = AsyncMock()
mock_repo.get_state = AsyncMock(return_value=None)
mock_repo.set_state = AsyncMock()
iterations = 0
@@ -150,7 +152,7 @@ class TestIngesterIsolation:
task = asyncio.create_task(log_ingestion_worker(mock_repo))
with pytest.raises(asyncio.CancelledError):
await task
mock_repo.add_log.assert_not_called()
mock_repo.add_logs.assert_not_called()
@pytest.mark.asyncio
async def test_ingester_exits_on_db_fatal_error(self, tmp_path):
@@ -171,15 +173,16 @@ class TestIngesterIsolation:
json_file.write_text(json.dumps(valid_record) + "\n")
mock_repo = MagicMock()
mock_repo.add_log = AsyncMock(side_effect=Exception("no such table: logs"))
mock_repo.add_log = AsyncMock()
mock_repo.add_logs = AsyncMock(side_effect=Exception("no such table: logs"))
mock_repo.get_state = AsyncMock(return_value=None)
mock_repo.set_state = AsyncMock()
with patch.dict(os.environ, {"DECNET_INGEST_LOG_FILE": str(tmp_path / "test.log")}):
# Worker should exit the loop on fatal DB error
await log_ingestion_worker(mock_repo)
# Should have attempted to add the log before dying
mock_repo.add_log.assert_awaited_once()
# Should have attempted to bulk-add before dying
mock_repo.add_logs.assert_awaited_once()
# ─── Attacker worker isolation ───────────────────────────────────────────────