From 7b7999c176bb707bc31825b0032a1e17effa79ff Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis Date: Tue, 3 Feb 2026 15:43:21 +0200 Subject: [PATCH 1/4] Fix Arrow validity buffer lifetime in PyQuery --- tiledb/core.cc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tiledb/core.cc b/tiledb/core.cc index 98c0320cba..05887b3004 100644 --- a/tiledb/core.cc +++ b/tiledb/core.cc @@ -1524,6 +1524,14 @@ class PyQuery { for (auto& buffer_name : buffers_order_) { BufferInfo& buffer_info = buffers_.at(buffer_name); + // Convert validity to bitmap BEFORE creating BufferHolder + int64_t null_count = 0; + if (is_nullable(buffer_name)) { + null_count = count_zeros(buffer_info.validity); + buffer_info.validity = uint8_bool_to_uint8_bitmap( + buffer_info.validity); + } + auto buffer_holder = new BufferHolder( buffer_info.data, buffer_info.validity, buffer_info.offsets); @@ -1538,11 +1546,7 @@ class PyQuery { buffer_holder); if (is_nullable(buffer_name)) { - // count zeros before converting to bitmap - c_pa_array.null_count = count_zeros(buffer_info.validity); - // convert to bitmap - buffer_info.validity = uint8_bool_to_uint8_bitmap( - buffer_info.validity); + c_pa_array.null_count = null_count; c_pa_array.buffers[0] = buffer_info.validity.data(); c_pa_array.n_buffers = is_var(buffer_name) ? 3 : 2; c_pa_schema.flags |= ARROW_FLAG_NULLABLE; From aa1b5ff07f1c45f67608e6b167fc8f59fc631b8c Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis Date: Tue, 3 Feb 2026 15:43:25 +0200 Subject: [PATCH 2/4] Add test --- tiledb/tests/test_core.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tiledb/tests/test_core.py b/tiledb/tests/test_core.py index f25ff466b7..cbe5aa43fc 100644 --- a/tiledb/tests/test_core.py +++ b/tiledb/tests/test_core.py @@ -2,6 +2,7 @@ import random import numpy as np +import pytest from numpy.testing import assert_array_equal import tiledb @@ -157,3 +158,31 @@ def test_import_buffer(self): self.assertTrue("foo" in r) self.assertTrue("str" not in r) del q + + def test_nullable_arrow_buffer(self): + # BufferHolder must hold reference to converted bitmap, not original. + # Corrupted validity buffer causes wrong null positions in .to_pandas(). + pyarrow = pytest.importorskip("pyarrow") + + def _read_arrow(self, uri): + with tiledb.open(uri, "r") as A: + q = core.PyQuery(A.ctx, A, ("a",), (), 0, True) + sub = tiledb.Subarray(A) + sub.add_dim_range(0, (0, 4)) + q.set_subarray(sub) + q.submit() + return q._buffers_to_pa_table() + + uri = self.path("test_nullable_arrow_buffer") + dom = tiledb.Domain(tiledb.Dim("d", domain=(0, 4), tile=1, dtype=np.uint64)) + attr = tiledb.Attr("a", dtype="ascii", var=True, nullable=True) + tiledb.Array.create( + uri, tiledb.ArraySchema(domain=dom, attrs=[attr], sparse=True) + ) + + with tiledb.open(uri, "w") as A: + A[np.arange(5)] = {"a": pyarrow.array(["x", "y", None, None, ""])} + + df = self._read_arrow(uri).to_pandas() + + assert df["a"].isna().tolist() == [False, False, True, True, False] From 39f7682ced71d4d182c6424e21fd7c3717e7a511 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis Date: Tue, 3 Feb 2026 15:55:34 +0200 Subject: [PATCH 3/4] Fix --- tiledb/tests/test_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tiledb/tests/test_core.py b/tiledb/tests/test_core.py index cbe5aa43fc..14321ca522 100644 --- a/tiledb/tests/test_core.py +++ b/tiledb/tests/test_core.py @@ -164,7 +164,7 @@ def test_nullable_arrow_buffer(self): # Corrupted validity buffer causes wrong null positions in .to_pandas(). pyarrow = pytest.importorskip("pyarrow") - def _read_arrow(self, uri): + def _read_arrow(uri): with tiledb.open(uri, "r") as A: q = core.PyQuery(A.ctx, A, ("a",), (), 0, True) sub = tiledb.Subarray(A) @@ -183,6 +183,6 @@ def _read_arrow(self, uri): with tiledb.open(uri, "w") as A: A[np.arange(5)] = {"a": pyarrow.array(["x", "y", None, None, ""])} - df = self._read_arrow(uri).to_pandas() + df = _read_arrow(uri).to_pandas() assert df["a"].isna().tolist() == [False, False, True, True, False] From 7453458b98c6351ec776ce2a042eb9e2dadcd429 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis Date: Wed, 4 Feb 2026 11:16:27 +0200 Subject: [PATCH 4/4] Skip test when pandas not installed --- tiledb/tests/test_core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tiledb/tests/test_core.py b/tiledb/tests/test_core.py index 14321ca522..b236a0f879 100644 --- a/tiledb/tests/test_core.py +++ b/tiledb/tests/test_core.py @@ -162,6 +162,7 @@ def test_import_buffer(self): def test_nullable_arrow_buffer(self): # BufferHolder must hold reference to converted bitmap, not original. # Corrupted validity buffer causes wrong null positions in .to_pandas(). + pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") def _read_arrow(uri):