apache · cxzl25 · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025
diff --git a/.github/workflows/utitcase-spark-4.x.yml b/.github/workflows/utitcase-spark-4.x.yml
@@ -61,10 +61,10 @@ jobs:
           jvm_timezone=$(random_timezone)
           echo "JVM timezone is set to $jvm_timezone"
           test_modules=""
-          for suffix in ut 4.0; do
+          for suffix in ut 4.1 4.0; do
           test_modules+="org.apache.paimon:paimon-spark-${suffix}_2.13,"
           done
           test_modules="${test_modules%,}"
           mvn -T 2C -B verify -pl "${test_modules}" -Duser.timezone=$jvm_timezone -Pspark4,flink1
         env:
-          MAVEN_OPTS: -Xmx4096m
+          MAVEN_OPTS: -Xmx4096m
diff --git a/...-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala b/...-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.data
+
+import org.apache.paimon.types.DataType
+
+import org.apache.spark.unsafe.types.VariantVal
+
+class Spark4ArrayData(override val elementType: DataType) extends AbstractSparkArrayData {
+
+  override def getVariant(ordinal: Int): VariantVal = {
+    val v = paimonArray.getVariant(ordinal)
+    new VariantVal(v.value(), v.metadata())
+  }
+}
diff --git a/...park/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala b/...park/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.data
+
+import org.apache.paimon.spark.AbstractSparkInternalRow
+import org.apache.paimon.types.RowType
+
+import org.apache.spark.unsafe.types.VariantVal
+
+class Spark4InternalRow(rowType: RowType) extends AbstractSparkInternalRow(rowType) {
+
+  override def getVariant(i: Int): VariantVal = {
+    val v = row.getVariant(i)
+    new VariantVal(v.value(), v.metadata())
+  }
+}
diff --git a/...mon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala b/...mon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.data
+
+import org.apache.paimon.types.RowType
+import org.apache.paimon.utils.InternalRowUtils.copyInternalRow
+
+import org.apache.spark.sql.catalyst.InternalRow
+
+class Spark4InternalRowWithBlob(rowType: RowType, blobFieldIndex: Int, blobAsDescriptor: Boolean)
+  extends Spark4InternalRow(rowType) {
+
+  override def getBinary(ordinal: Int): Array[Byte] = {
+    if (ordinal == blobFieldIndex) {
+      if (blobAsDescriptor) {
+        row.getBlob(ordinal).toDescriptor.serialize()
+      } else {
+        row.getBlob(ordinal).toData
+      }
+    } else {
+      super.getBinary(ordinal)
+    }
+  }
+
+  override def copy: InternalRow =
+    SparkInternalRow.create(rowType, blobAsDescriptor).replace(copyInternalRow(row, rowType))
+}
diff --git a/.../paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/.../paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.paimon.shims
+
+import org.apache.paimon.spark.data.{Spark4ArrayData, Spark4InternalRow, Spark4InternalRowWithBlob, SparkArrayData, SparkInternalRow}
+import org.apache.paimon.types.{DataType, RowType}
+
+import org.apache.hadoop.fs.Path
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.MergeRows
+import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction
+import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+import scala.collection.JavaConverters._
+
+object MinorVersionShim {
+
+  def createKeep(context: String, condition: Expression, output: Seq[Expression]): Instruction = {
+    MergeRows.Keep(condition, output)
+  }
+
+  def createSparkInternalRow(rowType: RowType): SparkInternalRow = {
+    new Spark4InternalRow(rowType)
+  }
+
+  def createSparkInternalRowWithBlob(
+      rowType: RowType,
+      blobFieldIndex: Int,
+      blobAsDescriptor: Boolean): SparkInternalRow = {
+    new Spark4InternalRowWithBlob(rowType, blobFieldIndex, blobAsDescriptor)
+  }
+
+  def createSparkArrayData(elementType: DataType): SparkArrayData = {
+    new Spark4ArrayData(elementType)
+  }
+
+  def createFileIndex(
+      options: CaseInsensitiveStringMap,
+      sparkSession: SparkSession,
+      paths: Seq[String],
+      userSpecifiedSchema: Option[StructType],
+      partitionSchema: StructType): PartitioningAwareFileIndex = {
+
+    class PartitionedMetadataLogFileIndex(
+        sparkSession: SparkSession,
+        path: Path,
+        parameters: Map[String, String],
+        userSpecifiedSchema: Option[StructType],
+        override val partitionSchema: StructType)
+      extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema)
+
+    class PartitionedInMemoryFileIndex(
+        sparkSession: SparkSession,
+        rootPathsSpecified: Seq[Path],
+        parameters: Map[String, String],
+        userSpecifiedSchema: Option[StructType],
+        fileStatusCache: FileStatusCache = NoopCache,
+        userSpecifiedPartitionSpec: Option[PartitionSpec] = None,
+        metadataOpsTimeNs: Option[Long] = None,
+        override val partitionSchema: StructType)
+      extends InMemoryFileIndex(
+        sparkSession,
+        rootPathsSpecified,
+        parameters,
+        userSpecifiedSchema,
+        fileStatusCache,
+        userSpecifiedPartitionSpec,
+        metadataOpsTimeNs)
+
+    def globPaths: Boolean = {
+      val entry = options.get(DataSource.GLOB_PATHS_KEY)
+      Option(entry).forall(_ == "true")
+    }
+
+    val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
+    val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
+    if (FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)) {
+      new PartitionedMetadataLogFileIndex(
+        sparkSession,
+        new Path(paths.head),
+        options.asScala.toMap,
+        userSpecifiedSchema,
+        partitionSchema = partitionSchema)
+    } else {
+      val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary(
+        paths,
+        hadoopConf,
+        checkEmptyGlobPath = true,
+        checkFilesExist = true,
+        enableGlobbing = globPaths)
+      val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
+
+      new PartitionedInMemoryFileIndex(
+        sparkSession,
+        rootPathsSpecified,
+        caseSensitiveMap,
+        userSpecifiedSchema,
+        fileStatusCache,
+        partitionSchema = partitionSchema)
+    }
+  }
+
+}