diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java index 00faa97d3..23c66078d 100644 --- a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java +++ b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java @@ -59,10 +59,12 @@ public class HudiConversionSource implements ConversionSource { public HudiConversionSource( HoodieTableMetaClient metaClient, - PathBasedPartitionSpecExtractor sourcePartitionSpecExtractor) { + PathBasedPartitionSpecExtractor sourcePartitionSpecExtractor, + boolean omitMetadataFields) { this.metaClient = metaClient; this.tableExtractor = - new HudiTableExtractor(new HudiSchemaExtractor(), sourcePartitionSpecExtractor); + new HudiTableExtractor( + new HudiSchemaExtractor(), sourcePartitionSpecExtractor, omitMetadataFields); this.dataFileExtractor = new HudiDataFileExtractor( metaClient, diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java index aad7e0a16..55abc267c 100644 --- a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java +++ b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java @@ -46,7 +46,9 @@ public HudiConversionSource getConversionSourceInstance(SourceTable sourceTable) final PathBasedPartitionSpecExtractor sourcePartitionSpecExtractor = HudiSourceConfig.fromProperties(sourceTable.getAdditionalProperties()) .loadSourcePartitionSpecExtractor(); + boolean omitMetadataFields = + HudiSourceConfig.getOmitMetadataFields(sourceTable.getAdditionalProperties(), hadoopConf); - return new HudiConversionSource(metaClient, sourcePartitionSpecExtractor); + return new HudiConversionSource(metaClient, sourcePartitionSpecExtractor, omitMetadataFields); } } diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java index 7732c3820..a4eb672af 100644 --- a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java +++ b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java @@ -26,6 +26,8 @@ import lombok.Value; +import org.apache.hadoop.conf.Configuration; + import com.google.common.base.Preconditions; import org.apache.xtable.model.schema.PartitionFieldSpec; @@ -39,6 +41,10 @@ public class HudiSourceConfig { "xtable.hudi.source.partition_spec_extractor_class"; public static final String PARTITION_FIELD_SPEC_CONFIG = "xtable.hudi.source.partition_field_spec_config"; + public static final String OMIT_METADATA_FIELDS_CONFIG = + "xtable.hudi.source.omit_metadata_fields"; + public static final String HUDI_OMIT_METADATA_FIELDS_CONFIG = + "hoodie.datasource.hive_sync.omit_metadata_fields"; String partitionSpecExtractorClass; List partitionFieldSpecs; @@ -84,4 +90,30 @@ public PathBasedPartitionSpecExtractor loadSourcePartitionSpecExtractor() { return ReflectionUtils.createInstanceOfClass( partitionSpecExtractorClass, this.getPartitionFieldSpecs()); } + + public static boolean getOmitMetadataFields(Properties properties, Configuration configuration) { + String propertyValue = + getPropertyOrNull( + properties, OMIT_METADATA_FIELDS_CONFIG, HUDI_OMIT_METADATA_FIELDS_CONFIG); + if (propertyValue != null) { + return Boolean.parseBoolean(propertyValue); + } + if (configuration == null) { + return false; + } + String configValue = configuration.get(OMIT_METADATA_FIELDS_CONFIG); + if (configValue == null) { + configValue = configuration.get(HUDI_OMIT_METADATA_FIELDS_CONFIG); + } + return configValue != null && Boolean.parseBoolean(configValue); + } + + private static String getPropertyOrNull( + Properties properties, String primaryKey, String fallbackKey) { + if (properties == null) { + return null; + } + String value = properties.getProperty(primaryKey); + return value != null ? value : properties.getProperty(fallbackKey); + } } diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java index 795f651ce..fda484a96 100644 --- a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java +++ b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java @@ -21,12 +21,14 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import javax.inject.Singleton; import org.apache.avro.Schema; +import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.TableSchemaResolver; import org.apache.hudi.common.table.timeline.HoodieInstant; @@ -47,14 +49,20 @@ */ @Singleton public class HudiTableExtractor { + private static final Set HUDI_METADATA_FIELDS = + HoodieRecord.HOODIE_META_COLUMNS.stream().collect(Collectors.toSet()); + private final HudiSchemaExtractor schemaExtractor; private final SourcePartitionSpecExtractor partitionSpecExtractor; + private final boolean omitMetadataFields; public HudiTableExtractor( HudiSchemaExtractor schemaExtractor, - SourcePartitionSpecExtractor sourcePartitionSpecExtractor) { + SourcePartitionSpecExtractor sourcePartitionSpecExtractor, + boolean omitMetadataFields) { this.schemaExtractor = schemaExtractor; this.partitionSpecExtractor = sourcePartitionSpecExtractor; + this.omitMetadataFields = omitMetadataFields; } public InternalTable table(HoodieTableMetaClient metaClient, HoodieInstant commit) { @@ -70,6 +78,7 @@ public InternalTable table(HoodieTableMetaClient metaClient, HoodieInstant commi "Failed to convert table %s schema", metaClient.getTableConfig().getTableName()), e); } + canonicalSchema = omitMetadataFieldsIfEnabled(canonicalSchema, omitMetadataFields); List partitionFields = partitionSpecExtractor.spec(canonicalSchema); List recordKeyFields = getRecordKeyFields(metaClient, canonicalSchema); if (!recordKeyFields.isEmpty()) { @@ -91,6 +100,21 @@ public InternalTable table(HoodieTableMetaClient metaClient, HoodieInstant commi .build(); } + static InternalSchema omitMetadataFieldsIfEnabled( + InternalSchema schema, boolean omitMetadataFields) { + if (!omitMetadataFields || schema.getFields() == null || schema.getFields().isEmpty()) { + return schema; + } + List filteredFields = + schema.getFields().stream() + .filter(field -> !HUDI_METADATA_FIELDS.contains(field.getName())) + .collect(Collectors.toList()); + if (filteredFields.size() == schema.getFields().size()) { + return schema; + } + return schema.toBuilder().fields(filteredFields).build(); + } + private List getRecordKeyFields( HoodieTableMetaClient metaClient, InternalSchema canonicalSchema) { Option recordKeyFieldNames = metaClient.getTableConfig().getRecordKeyFields(); diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSource.java b/xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSource.java index b1cba5c7a..e1a963998 100644 --- a/xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSource.java +++ b/xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSource.java @@ -696,7 +696,7 @@ private HudiConversionSource getHudiSourceClient( PathBasedPartitionSpecExtractor partitionSpecExtractor = new ConfigurationBasedPartitionSpecExtractor( HudiSourceConfig.parsePartitionFieldSpecs(xTablePartitionConfig)); - return new HudiConversionSource(hoodieTableMetaClient, partitionSpecExtractor); + return new HudiConversionSource(hoodieTableMetaClient, partitionSpecExtractor, false); } private boolean checkIfNewFileGroupIsAdded(String activePath, TableChange tableChange) { diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiSourceConfig.java b/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiSourceConfig.java new file mode 100644 index 000000000..9718d6b65 --- /dev/null +++ b/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiSourceConfig.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable.hudi; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.Test; + +public class TestHudiSourceConfig { + + @Test + void testOmitMetadataFieldsFromProperties() { + Properties properties = new Properties(); + properties.setProperty(HudiSourceConfig.OMIT_METADATA_FIELDS_CONFIG, "true"); + Configuration configuration = new Configuration(false); + + assertTrue(HudiSourceConfig.getOmitMetadataFields(properties, configuration)); + } + + @Test + void testOmitMetadataFieldsFromHudiAlias() { + Properties properties = new Properties(); + properties.setProperty(HudiSourceConfig.HUDI_OMIT_METADATA_FIELDS_CONFIG, "true"); + Configuration configuration = new Configuration(false); + + assertTrue(HudiSourceConfig.getOmitMetadataFields(properties, configuration)); + } + + @Test + void testOmitMetadataFieldsFromConfiguration() { + Properties properties = new Properties(); + Configuration configuration = new Configuration(false); + configuration.set(HudiSourceConfig.OMIT_METADATA_FIELDS_CONFIG, "true"); + + assertTrue(HudiSourceConfig.getOmitMetadataFields(properties, configuration)); + } + + @Test + void testOmitMetadataFieldsDefaultsToFalse() { + Properties properties = new Properties(); + Configuration configuration = new Configuration(false); + + assertFalse(HudiSourceConfig.getOmitMetadataFields(properties, configuration)); + } +} diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiTableExtractor.java b/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiTableExtractor.java new file mode 100644 index 000000000..2d58da15b --- /dev/null +++ b/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiTableExtractor.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable.hudi; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import org.junit.jupiter.api.Test; + +import org.apache.xtable.model.schema.InternalField; +import org.apache.xtable.model.schema.InternalSchema; +import org.apache.xtable.model.schema.InternalType; + +public class TestHudiTableExtractor { + + @Test + void testOmitMetadataFieldsWhenEnabled() { + InternalSchema schema = + InternalSchema.builder() + .name("test") + .dataType(InternalType.RECORD) + .fields( + Arrays.asList( + field("_hoodie_commit_time"), + field("_hoodie_record_key"), + field("id"), + field("name"))) + .build(); + + InternalSchema filtered = HudiTableExtractor.omitMetadataFieldsIfEnabled(schema, true); + + List fieldNames = + filtered.getFields().stream().map(InternalField::getName).collect(Collectors.toList()); + assertEquals(Arrays.asList("id", "name"), fieldNames); + } + + @Test + void testRetainsFieldsWhenOmitDisabled() { + InternalSchema schema = + InternalSchema.builder() + .name("test") + .dataType(InternalType.RECORD) + .fields(Arrays.asList(field("_hoodie_commit_time"), field("id"))) + .build(); + + InternalSchema filtered = HudiTableExtractor.omitMetadataFieldsIfEnabled(schema, false); + + assertEquals(schema, filtered); + assertTrue(filtered.getFields().stream().anyMatch(f -> f.getName().startsWith("_hoodie_"))); + } + + private static InternalField field(String name) { + return InternalField.builder() + .name(name) + .schema(InternalSchema.builder().dataType(InternalType.STRING).build()) + .build(); + } +} diff --git a/xtable-hudi-support/xtable-hudi-support-extensions/src/main/java/org/apache/xtable/hudi/sync/XTableSyncConfig.java b/xtable-hudi-support/xtable-hudi-support-extensions/src/main/java/org/apache/xtable/hudi/sync/XTableSyncConfig.java index 4d878cea5..13f5ed0b7 100644 --- a/xtable-hudi-support/xtable-hudi-support-extensions/src/main/java/org/apache/xtable/hudi/sync/XTableSyncConfig.java +++ b/xtable-hudi-support/xtable-hudi-support-extensions/src/main/java/org/apache/xtable/hudi/sync/XTableSyncConfig.java @@ -36,6 +36,11 @@ public class XTableSyncConfig extends HoodieSyncConfig implements Serializable { .defaultValue(24 * 7) .withDocumentation("Retention in hours for metadata in target table."); + public static final ConfigProperty XTABLE_OMIT_METADATA_FIELDS = + ConfigProperty.key("hoodie.xtable.sync.omit_metadata_fields") + .defaultValue(false) + .withDocumentation("Whether to omit the hudi metadata fields in the target table."); + public XTableSyncConfig(Properties props) { super(props); }