diff --git a/metadata_columns.go b/metadata_columns.go new file mode 100644 index 000000000..27bb0a8a2 --- /dev/null +++ b/metadata_columns.go @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package iceberg + +// Reserved metadata column field IDs per the Iceberg spec. +// These IDs must not be used by user-defined schema fields. +const ( + // RowIDFieldID is the reserved field ID for _row_id (v3+). + RowIDFieldID = 2147483540 + // LastUpdatedSequenceNumberFieldID is the reserved field ID for _last_updated_sequence_number (v3+). + LastUpdatedSequenceNumberFieldID = 2147483539 +) + +// IsMetadataColumnID returns true if the field ID is reserved for a metadata column. +func IsMetadataColumnID(fieldID int) bool { + return fieldID == RowIDFieldID || fieldID == LastUpdatedSequenceNumberFieldID +} diff --git a/table/metadata_builder_internal_test.go b/table/metadata_builder_internal_test.go index 79cca000f..fc66936d0 100644 --- a/table/metadata_builder_internal_test.go +++ b/table/metadata_builder_internal_test.go @@ -1688,6 +1688,22 @@ func TestUnknownTypeValidation(t *testing.T) { require.ErrorContains(t, err, "must be optional") }) + t.Run("ReservedFieldIDRowID", func(t *testing.T) { + schema := iceberg.NewSchema(1, + iceberg.NestedField{ID: iceberg.RowIDFieldID, Name: "bad_field", Type: iceberg.PrimitiveTypes.Int64}, + ) + err := checkSchemaCompatibility(schema, 3) + require.Error(t, err) + require.ErrorContains(t, err, "reserved metadata column ID") + }) + t.Run("ReservedFieldIDLastUpdatedSeqNum", func(t *testing.T) { + schema := iceberg.NewSchema(1, + iceberg.NestedField{ID: iceberg.LastUpdatedSequenceNumberFieldID, Name: "bad_field", Type: iceberg.PrimitiveTypes.Int64}, + ) + err := checkSchemaCompatibility(schema, 3) + require.Error(t, err) + require.ErrorContains(t, err, "reserved metadata column ID") + }) t.Run("InvalidUnknownMapValue", func(t *testing.T) { invalidSchema := iceberg.NewSchema(1, iceberg.NestedField{ID: 2, Name: "invalid_map", Type: &iceberg.MapType{KeyID: 3, KeyType: iceberg.StringType{}, ValueID: 4, ValueType: iceberg.UnknownType{}, ValueRequired: true}, Required: false}, diff --git a/table/metadata_schema_compatibility.go b/table/metadata_schema_compatibility.go index 1d3f76ee9..fdccc7fdb 100644 --- a/table/metadata_schema_compatibility.go +++ b/table/metadata_schema_compatibility.go @@ -95,6 +95,11 @@ func checkSchemaCompatibility(sc *iceberg.Schema, formatVersion int) error { panic("invalid schema: field with id " + strconv.Itoa(field.ID) + " not found, this is a bug, please report.") } + if iceberg.IsMetadataColumnID(field.ID) { + return fmt.Errorf("%w: field '%s' uses reserved metadata column ID %d", + iceberg.ErrInvalidSchema, colName, field.ID) + } + minFormatVersion := minFormatVersionForType(field.Type) if formatVersion < minFormatVersion { problems = append(problems, IncompatibleField{