From 5cb87fffa873be808ecfcc796522f99d4db52dec Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Mon, 22 Jun 2026 21:56:28 -0700 Subject: [PATCH] fix: SIGSEGV due to out of bounds access after delete + checkpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Root cause After relationship deletes followed by a checkpoint, an in-memory CSR index entry (csrIndex->indices[nodeOffset]) can retain a stale INVALID_ROW_IDX (UINT64_MAX) sentinel row. Passing that through getQuotientRemainder(row, CHUNKED_NODE_GROUP_CAPACITY) produces an out-of-range chunk-group index, which truncates to UINT32_MAX in getGroup(idx_t) — an out-of-bounds std::vector access guarded only by a debug DASSERT (a no-op in release), so callers dereference a null/OOB group. The sentinel survives because checkpointInMemAndOnDisk calls collectLeafRegionsAndCSRLength (which runs collectInMemRegionChangesAndUpdateHeaderLength on all leaf regions, writing INVALID_ROW_IDX via setInvalid(i)), but then takes an early-return path (line 538, regionsToCheckpoint.empty()) without calling finalizeCheckpoint — so the index is left with stale sentinels and the chunked groups are not cleared. ### Fix src/storage/table/csr_node_group.cpp — bounds-check chunkIdx against chunkedGroups.getNumGroups(lock) before every unguarded getGroup call site, treating stale rows as skip/deleted (consistent with the two already-guarded sites at lines 851 and 1060). The 6 fixed sites: 1. scanCommittedInMemSequential → return empty result 2. scanCommittedInMemRandom → skip row 3. update (COMMITTED_IN_MEMORY) → no-op 4. delete_ (COMMITTED_IN_MEMORY) → no-op (return false) 5. collectInMemRegionChangesAndUpdateHeaderLength → treat as deleted, set invalid 6. populateCSRLengthInMemOnly → treat as deleted, decrement length --- src/storage/table/csr_node_group.cpp | 43 +++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/storage/table/csr_node_group.cpp b/src/storage/table/csr_node_group.cpp index cdb20ca6ea..7818a7ee7d 100644 --- a/src/storage/table/csr_node_group.cpp +++ b/src/storage/table/csr_node_group.cpp @@ -290,6 +290,12 @@ NodeGroupScanResult CSRNodeGroup::scanCommittedInMemSequential(const Transaction const ChunkedNodeGroup* chunkedGroup = nullptr; { const auto lock = chunkedGroups.lock(); + // A stale CSR index entry (e.g. an INVALID_ROW_IDX sentinel) can yield an + // out-of-range chunk group index. Guard against it instead of dereferencing + // a null/out-of-bounds group. See LadybugDB/ladybug#611. + if (chunkIdx >= chunkedGroups.getNumGroups(lock)) { + return NODE_GROUP_SCAN_EMPTY_RESULT; + } chunkedGroup = chunkedGroups.getGroup(lock, chunkIdx); } chunkedGroup->scan(transaction, tableState, nodeGroupScanState, startRowInChunk, numRows); @@ -315,8 +321,15 @@ NodeGroupScanResult CSRNodeGroup::scanCommittedInMemRandom(const Transaction* tr auto [chunkIdx, rowInChunk] = StorageUtils::getQuotientRemainder(rowIdx, StorageConfig::CHUNKED_NODE_GROUP_CAPACITY); if (chunkIdx != currentChunkIdx) { - currentChunkIdx = chunkIdx; const auto lock = chunkedGroups.lock(); + // A stale CSR index entry (e.g. an INVALID_ROW_IDX sentinel) can yield an + // out-of-range chunk group index. Skip such rows instead of dereferencing + // a null/out-of-bounds group. See LadybugDB/ladybug#611. + if (chunkIdx >= chunkedGroups.getNumGroups(lock)) { + nextRow++; + continue; + } + currentChunkIdx = chunkIdx; chunkedGroup = chunkedGroups.getGroup(lock, chunkIdx); } DASSERT(chunkedGroup); @@ -409,6 +422,11 @@ void CSRNodeGroup::update(const Transaction* transaction, CSRNodeGroupScanSource auto [chunkIdx, rowInChunk] = StorageUtils::getQuotientRemainder(rowIdxInGroup, StorageConfig::CHUNKED_NODE_GROUP_CAPACITY); const auto lock = chunkedGroups.lock(); + // A stale CSR index entry can yield an out-of-range chunk group index; treat + // the update as a no-op in that case. See LadybugDB/ladybug#611. + if (chunkIdx >= chunkedGroups.getNumGroups(lock)) { + return; + } const auto chunkedGroup = chunkedGroups.getGroup(lock, chunkIdx); return chunkedGroup->update(transaction, rowInChunk, columnID, propertyVector); } @@ -431,6 +449,11 @@ bool CSRNodeGroup::delete_(const Transaction* transaction, CSRNodeGroupScanSourc auto [chunkIdx, rowInChunk] = StorageUtils::getQuotientRemainder(rowIdxInGroup, StorageConfig::CHUNKED_NODE_GROUP_CAPACITY); const auto lock = chunkedGroups.lock(); + // A stale CSR index entry can yield an out-of-range chunk group index; treat + // the delete as a no-op in that case. See LadybugDB/ladybug#611. + if (chunkIdx >= chunkedGroups.getNumGroups(lock)) { + return false; + } const auto chunkedGroup = chunkedGroups.getGroup(lock, chunkIdx); return chunkedGroup->delete_(transaction, rowInChunk); } @@ -913,6 +936,15 @@ void CSRNodeGroup::collectInMemRegionChangesAndUpdateHeaderLength(const UniqLock const auto row = rows[i]; auto [chunkIdx, rowInChunk] = StorageUtils::getQuotientRemainder(row, StorageConfig::CHUNKED_NODE_GROUP_CAPACITY); + // A stale CSR index entry (e.g. an INVALID_ROW_IDX sentinel from a + // prior checkpoint pass) can yield an out-of-range chunk group index. + // Treat it as already deleted and skip. See LadybugDB/ladybug#611. + if (chunkIdx >= chunkedGroups.getNumGroups(lock)) { + csrIndex->indices[nodeOffset].turnToNonSequential(); + csrIndex->indices[nodeOffset].setInvalid(i); + numInMemDeletionsInCSR++; + continue; + } const auto chunkedGroup = chunkedGroups.getGroup(lock, chunkIdx); if (chunkedGroup->isDeleted(txn, rowInChunk)) { csrIndex->indices[nodeOffset].turnToNonSequential(); @@ -1117,6 +1149,15 @@ void CSRNodeGroup::populateCSRLengthInMemOnly(const UniqLock& lock, offset_t num const auto row = rows[i]; auto [chunkIdx, rowInChunk] = StorageUtils::getQuotientRemainder(row, StorageConfig::CHUNKED_NODE_GROUP_CAPACITY); + // A stale CSR index entry (e.g. an INVALID_ROW_IDX sentinel from a prior + // checkpoint pass) can yield an out-of-range chunk group index. Treat it + // as already deleted. See LadybugDB/ladybug#611. + if (chunkIdx >= chunkedGroups.getNumGroups(lock)) { + csrIndex->indices[offset].turnToNonSequential(); + csrIndex->indices[offset].setInvalid(i); + lengthAfterDelete--; + continue; + } const auto chunkedGroup = chunkedGroups.getGroup(lock, chunkIdx); const auto isDeleted = chunkedGroup->isDeleted(txn, rowInChunk); if (isDeleted) {