diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 1077f41f6a..f74a116e6e 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1435,6 +1435,13 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType: elif pa.types.is_null(primitive): # PyArrow null type (pa.null()) is converted to Iceberg UnknownType # UnknownType can be promoted to any primitive type in V3+ tables per the Iceberg spec + if self._format_version < 3: + field_path = ".".join(self._field_names) if self._field_names else "" + raise ValueError( + "Null type (pa.null()) is not supported in Iceberg format version " + f"{self._format_version}. Field: {field_path}. " + "Use a concrete type (string, int, boolean, etc.) or set format-version=3." + ) return UnknownType() elif isinstance(primitive, pa.UuidType): return UUIDType() diff --git a/tests/catalog/test_base.py b/tests/catalog/test_base.py index 42702c8c2b..91f13b426e 100644 --- a/tests/catalog/test_base.py +++ b/tests/catalog/test_base.py @@ -207,6 +207,16 @@ def test_convert_schema_if_needed( assert expected == catalog._convert_schema_if_needed(schema) +def test_convert_schema_if_needed_rejects_null_type(catalog: InMemoryCatalog) -> None: + schema = pa.schema([pa.field("n1", pa.null())]) + with pytest.raises(ValueError) as exc_info: + catalog._convert_schema_if_needed(schema) + message = str(exc_info.value) + assert "Null type" in message + assert "n1" in message + assert "format-version=3" in message + + def test_create_table_pyarrow_schema(catalog: InMemoryCatalog, pyarrow_schema_simple_without_ids: pa.Schema) -> None: catalog.create_namespace(TEST_TABLE_NAMESPACE) table = catalog.create_table(