TileDB-Inc · kounelisagis · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -156,6 +156,13 @@ jobs:
           cd $RUNNER_TEMP
           pytest -vv --showlocals $PROJECT_CWD
 
+      - name: "Re-run tests with pandas 2"
+        run: |
+          PROJECT_CWD=$PWD
+          pip install "pandas>=2,<3"
+          cd $RUNNER_TEMP
+          pytest -vv --showlocals $PROJECT_CWD
+
       - name: "Re-run tests without pandas"
         run: |
           PROJECT_CWD=$PWD

diff --git a/examples/incomplete_iteration.py b/examples/incomplete_iteration.py
@@ -40,21 +40,14 @@
 
 
 def check_dataframe_deps():
-    pd_error = """Pandas version >= 1.0 and < 3.0 required for dataframe functionality.
-                  Please `pip install pandas>=1.0,<3.0` to proceed."""
+    pd_error = """Pandas is required for dataframe functionality.
+                  Please `pip install pandas` to proceed."""
 
     try:
-        import pandas as pd
+        import pandas
     except ImportError:
         raise Exception(pd_error)
 
-    from packaging.version import Version
-
-    if Version(pd.__version__) < Version("1.0") or Version(pd.__version__) >= Version(
-        "3.0.0.dev0"
-    ):
-        raise Exception(pd_error)
-
 
 # Name of the array to create.
 array_name = "incomplete_iteration"

diff --git a/examples/parallel_csv_ingestion.py b/examples/parallel_csv_ingestion.py
@@ -49,21 +49,14 @@
 
 
 def check_dataframe_deps():
-    pd_error = """Pandas version >= 1.0 and < 3.0 required for dataframe functionality.
-                  Please `pip install pandas>=1.0,<3.0` to proceed."""
+    pd_error = """Pandas is required for dataframe functionality.
+                  Please `pip install pandas` to proceed."""
 
     try:
-        import pandas as pd
+        import pandas
     except ImportError:
         raise Exception(pd_error)
 
-    from packaging.version import Version
-
-    if Version(pd.__version__) < Version("1.0") or Version(pd.__version__) >= Version(
-        "3.0.0.dev0"
-    ):
-        raise Exception(pd_error)
-
 
 def generate_csvs(csv_folder, count=9, min_length=1, max_length=109):
     def make_dataframe(col_size):

diff --git a/pyproject.toml b/pyproject.toml
@@ -57,7 +57,7 @@ test = [
     "hypothesis",
     "psutil",
     "pyarrow",
-    "pandas<3",
+    "pandas",
     "dask[distributed]",
 ]
 
@@ -118,6 +118,6 @@ test-requires = [
     "hypothesis",
     "psutil",
     "pyarrow",
-    "pandas<3",
+    "pandas",
 ]
 test-command = "pytest {project}"
diff --git a/tiledb/dataframe_.py b/tiledb/dataframe_.py
@@ -15,28 +15,18 @@
 
 
 def check_dataframe_deps():
-    pd_error = """Pandas version >= 1.0 and < 3.0 required for dataframe functionality.
-                  Please `pip install pandas>=1.0,<3.0` to proceed."""
-    pa_error = """PyArrow version >= 1.0 is suggested for dataframe functionality.
-                  Please `pip install pyarrow>=1.0`."""
+    pd_error = """Pandas is required for dataframe functionality.
+                  Please `pip install pandas` to proceed."""
+    pa_error = """PyArrow is suggested for dataframe functionality.
+                  Please `pip install pyarrow`."""
 
     try:
-        import pandas as pd
+        import pandas
     except ImportError:
         raise Exception(pd_error)
 
-    from packaging.version import Version
-
-    if Version(pd.__version__) < Version("1.0") or Version(pd.__version__) >= Version(
-        "3.0.0.dev0"
-    ):
-        raise Exception(pd_error)
-
     try:
-        import pyarrow as pa
-
-        if Version(pa.__version__) < Version("1.0"):
-            warnings.warn(pa_error)
+        import pyarrow
     except ImportError:
         warnings.warn(pa_error)
 
@@ -154,7 +144,7 @@ class ColumnInfo:
 
     @classmethod
     def from_values(cls, array_like, varlen_types=()):
-        from pandas import CategoricalDtype
+        from pandas import CategoricalDtype, StringDtype
         from pandas.api import types as pd_types
 
         if pd_types.is_object_dtype(array_like):
@@ -171,6 +161,16 @@ def from_values(cls, array_like, varlen_types=()):
                 raise NotImplementedError(
                     f"{inferred_dtype} inferred dtype not supported (column {array_like.name})"
                 )
+        elif hasattr(array_like, "dtype") and isinstance(array_like.dtype, StringDtype):
+            # Explicit pd.StringDtype() (name="string") is always nullable;
+            # auto-inferred str (name="str") depends on data
+            explicit = array_like.dtype.name == "string"
+            return cls(
+                np.dtype(np.str_),
+                repr="string" if explicit else None,
+                var=True,
+                nullable=explicit or bool(array_like.isna().any()),
+            )
         elif hasattr(array_like, "dtype") and isinstance(
             array_like.dtype, CategoricalDtype
         ):
@@ -211,6 +211,14 @@ def from_dtype(cls, dtype, column_name, varlen_types=()):
         dtype = pd_types.pandas_dtype(dtype)
         # Note: be careful if you rearrange the order of the following checks
 
+        # pandas StringDtype (auto-inferred 'str' and explicit 'string')
+        from pandas import StringDtype
+
+        if isinstance(dtype, StringDtype):
+            repr_val = "string" if dtype.name == "string" else None
+            nullable = dtype.name == "string"
+            return cls(np.dtype(np.str_), repr=repr_val, var=True, nullable=nullable)
+
         # extension types
         if pd_types.is_extension_array_dtype(dtype):
             if libtiledb_version() < (2, 10) and pd_types.is_bool_dtype(dtype):
@@ -255,12 +263,7 @@ def from_dtype(cls, dtype, column_name, varlen_types=()):
 
         # datetime types
         if pd_types.is_datetime64_any_dtype(dtype):
-            if dtype == "datetime64[ns]":
-                return cls(dtype)
-            else:
-                raise NotImplementedError(
-                    f"Only 'datetime64[ns]' datetime dtype is supported (column {column_name})"
-                )
+            return cls(dtype)
 
         # string types
         # don't use pd_types.is_string_dtype() because it includes object types too
@@ -517,8 +520,8 @@ def _df_to_np_arrays(df, column_infos, fillna):
         if not column_info.var:
             to_numpy_kwargs.update(dtype=column_info.dtype)
 
-        if column_info.nullable:
-            # use default 0/empty for the dtype
+        if column_info.nullable and column.isna().any():
+            # Only create nullmap if data actually has nulls
             to_numpy_kwargs.update(na_value=column_info.dtype.type())
             nullmaps[name] = (~column.isna()).to_numpy(dtype=np.uint8)
 

diff --git a/tiledb/dense_array.py b/tiledb/dense_array.py
@@ -481,6 +481,14 @@ def _setitem_impl(self, selection, val, nullmaps: dict):
 
                 try:
                     if attr.isvar:
+                        # Capture null mask before np.asarray() loses pandas NA info
+                        if (
+                            attr.isnullable
+                            and name not in nullmaps
+                            and hasattr(attr_val, "isna")
+                        ):
+                            nullmaps[name] = (~attr_val.isna()).to_numpy(dtype=np.uint8)
+
                         # ensure that the value is array-convertible, for example: pandas.Series
                         attr_val = np.asarray(attr_val)
                         if attr.isnullable and name not in nullmaps:

diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py
@@ -890,7 +890,16 @@ def _update_df_from_meta(
                 col_dtypes[name] = dtype
 
     if col_dtypes:
-        df = df.astype(col_dtypes, copy=False)
+        # '<U0' is stored in __pandas_index_dims metadata for var-length string
+        # dimensions (str(np.dtype(np.str_)) == '<U0>'). Applying astype('<U0')
+        # was a no-op on pandas 2 but on pandas 3 it forces StringDtype back to
+        # object, breaking the roundtrip. The string data already has the correct
+        # dtype from pandas' own inference, so we skip it here.
+        col_dtypes = {
+            name: dtype for name, dtype in col_dtypes.items() if dtype != "<U0"
+        }
+        if col_dtypes:
+            df = df.astype(col_dtypes)
 
     if index_col:
         if index_col is not True:

diff --git a/tiledb/sparse_array.py b/tiledb/sparse_array.py
@@ -122,6 +122,15 @@ def _setitem_impl_sparse(self, selection, val, nullmaps: dict):
         attr_val = val[name]
 
         try:
+            # Capture null mask before np.asarray() loses pandas NA info
+            if (
+                attr.isvar
+                and attr.isnullable
+                and name not in nullmaps
+                and hasattr(attr_val, "isna")
+            ):
+                nullmaps[name] = (~attr_val.isna()).to_numpy(dtype=np.uint8)
+
             # ensure that the value is array-convertible, for example: pandas.Series
             attr_val = np.asarray(attr_val)
 

diff --git a/tiledb/tests/common.py b/tiledb/tests/common.py
@@ -26,17 +26,12 @@
 
 def has_pandas():
     try:
-        import pandas as pd
-    except ImportError:
-        return False
+        import pandas
 
-    if Version(pd.__version__) < Version("1.0") or Version(pd.__version__) >= Version(
-        "3.0.0.dev0"
-    ):
+        return True
+    except ImportError:
         return False
 
-    return True
-
 
 def has_pyarrow():
     try:

diff --git a/tiledb/tests/datatypes.py b/tiledb/tests/datatypes.py
@@ -48,11 +48,17 @@ def __len__(self):
         return len(self._flat_arrays)
 
     def __getitem__(self, i):
-        return self._flat_arrays[i]
+        if isinstance(i, (int, np.integer)):
+            return self._flat_arrays[i]
+        return type(self)(self._flat_arrays[i], self._dtype)
 
     @property
     def dtype(self):
         return self._dtype
 
     def copy(self):
         return type(self)(self._flat_arrays, self._dtype)
+
+    @property
+    def ndim(self):
+        return 1
diff --git a/tiledb/tests/test_enumeration.py b/tiledb/tests/test_enumeration.py
@@ -111,7 +111,7 @@ def test_array_schema_enumeration(self):
 
     @pytest.mark.skipif(
         not has_pyarrow() or not has_pandas(),
-        reason="pyarrow>=1.0 and/or pandas>=1.0,<3.0 not installed",
+        reason="pyarrow>=1.0 and/or pandas not installed",
     )
     @pytest.mark.parametrize("sparse", [True, False])
     @pytest.mark.parametrize("pass_df", [True, False])
@@ -185,7 +185,7 @@ def test_enum_dtypes(self, dtype, values):
             assert enmr.dtype == enmr.values().dtype == dtype
             assert_array_equal(enmr.values(), values)
 
-    @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed")
+    @pytest.mark.skipif(not has_pandas(), reason="pandas not installed")
     def test_from_pandas_dtype_mismatch(self):
         import pandas as pd
 

diff --git a/tiledb/tests/test_examples.py b/tiledb/tests/test_examples.py
@@ -43,7 +43,7 @@ def test_examples(self, path):
             ]
         ]
         if not has_pandas() and path in requires_pd:
-            pytest.mark.skip("pandas>=1.0,<3.0 not installed")
+            pytest.mark.skip("pandas not installed")
         else:
             with tempfile.TemporaryDirectory() as tmpdir:
                 try:
@@ -73,10 +73,9 @@ def test_docs(self, capsys):
         if failures:
             stderr = capsys.readouterr().out
             if "No module named 'pandas'" in stderr or (
-                "Pandas version >= 1.0 and < 3.0 required for dataframe functionality"
-                in stderr
+                "Pandas is required for dataframe functionality" in stderr
                 and not has_pandas()
             ):
-                pytest.skip("pandas>=1.0,<3.0 not installed")
+                pytest.skip("pandas not installed")
             else:
                 pytest.fail(stderr)
diff --git a/tiledb/tests/test_fixes.py b/tiledb/tests/test_fixes.py
@@ -91,7 +91,7 @@ def test_ch8292(self):
                 buffers = list(*q._get_buffers().values())
                 assert buffers[0].nbytes == max_val
 
-    @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed")
+    @pytest.mark.skipif(not has_pandas(), reason="pandas not installed")
     def test_ch10282_concurrent_multi_index(self):
         """Test concurrent access to a single tiledb.Array using
         Array.multi_index and Array.df. We pass an array and slice
@@ -230,7 +230,7 @@ def test_fix_stats_error_messages(self):
 
     @pytest.mark.skipif(
         not has_pandas() and has_pyarrow(),
-        reason="pandas>=1.0,<3.0 or pyarrow>=1.0 not installed",
+        reason="pandas or pyarrow>=1.0 not installed",
     )
     def test_py1078_df_all_empty_strings(self):
         uri = self.path()
@@ -246,7 +246,7 @@ def test_py1078_df_all_empty_strings(self):
         with tiledb.open(uri) as arr:
             tm.assert_frame_equal(arr.df[:], df)
 
-    @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed")
+    @pytest.mark.skipif(not has_pandas(), reason="pandas not installed")
     @pytest.mark.parametrize("is_sparse", [True, False])
     def test_sc1430_nonexisting_timestamp(self, is_sparse):
         path = self.path("nonexisting_timestamp")

diff --git a/tiledb/tests/test_hypothesis.py b/tiledb/tests/test_hypothesis.py
@@ -13,7 +13,7 @@
 tm = pd._testing
 
 
-@pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed")
+@pytest.mark.skipif(not has_pandas(), reason="pandas not installed")
 @pytest.mark.parametrize("mode", ["np", "df"])
 @hp.settings(deadline=None, verbosity=hp.Verbosity.verbose)
 @hp.given(st.binary())

diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py
@@ -418,7 +418,7 @@ def test_array_delete(self):
 
     @pytest.mark.skipif(
         not has_pyarrow() or not has_pandas(),
-        reason="pyarrow>=1.0 and/or pandas>=1.0,<3.0 not installed",
+        reason="pyarrow>=1.0 and/or pandas not installed",
     )
     @pytest.mark.parametrize("sparse", [True, False])
     @pytest.mark.parametrize("pass_df", [True, False])
@@ -1784,7 +1784,7 @@ def test_query_real_multi_index(self, fx_sparse_cell_order):
                 "coords" not in T.query(coords=False).multi_index[-10.0:5.0]
             )
 
-    @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed")
+    @pytest.mark.skipif(not has_pandas(), reason="pandas not installed")
     @pytest.mark.parametrize("dtype", ["u1", "u2", "u4", "u8", "i1", "i2", "i4", "i8"])
     def test_sparse_index_dtypes(self, dtype):
         path = self.path()
@@ -1805,7 +1805,7 @@ def test_sparse_index_dtypes(self, dtype):
             assert B[data[1]]["attr"] == data[1]
             assert B.multi_index[data[0]]["attr"] == data[0]
 
-    @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed")
+    @pytest.mark.skipif(not has_pandas(), reason="pandas not installed")
     @pytest.mark.skipif(
         tiledb.libtiledb.version() < (2, 10),
         reason="TILEDB_BOOL introduced in libtiledb 2.10",
@@ -3743,7 +3743,7 @@ def test_query_return_incomplete_error(self, sparse):
                 with self.assertRaises(tiledb.TileDBError):
                     A.query(return_incomplete=True)[:]
 
-    @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed")
+    @pytest.mark.skipif(not has_pandas(), reason="pandas not installed")
     @pytest.mark.parametrize(
         "use_arrow, return_arrow, indexer",
         [