From ae2b8a64905cc72614de34d1e10e30ca1e1b63c4 Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 24 Dec 2025 12:11:58 +0800 Subject: [PATCH 01/35] support spark 4.1.0 --- paimon-spark/paimon-spark-4.1/pom.xml | 156 ++++++++++++++++++ .../MergePaimonScalarSubqueries.scala | 93 +++++++++++ .../resources/function/hive-test-udfs.jar | Bin 0 -> 35660 bytes .../src/test/resources/hive-site.xml | 56 +++++++ .../src/test/resources/log4j2-test.properties | 38 +++++ .../procedure/CompactProcedureTest.scala | 21 +++ .../spark/procedure/ProcedureTest.scala | 21 +++ .../paimon/spark/sql/AnalyzeTableTest.scala | 21 +++ .../org/apache/paimon/spark/sql/DDLTest.scala | 21 +++ .../spark/sql/DDLWithHiveCatalogTest.scala | 23 +++ .../paimon/spark/sql/DataFrameWriteTest.scala | 21 +++ .../spark/sql/DeleteFromTableTest.scala | 29 ++++ .../paimon/spark/sql/DescribeTableTest.scala | 21 +++ .../paimon/spark/sql/FormatTableTest.scala | 21 +++ .../spark/sql/InsertOverwriteTableTest.scala | 21 +++ .../paimon/spark/sql/MergeIntoTableTest.scala | 45 +++++ .../sql/PaimonCompositePartitionKeyTest.scala | 21 +++ .../spark/sql/PaimonOptimizationTest.scala | 39 +++++ .../paimon/spark/sql/PaimonPushDownTest.scala | 21 +++ .../spark/sql/PaimonV1FunctionTest.scala | 21 +++ .../paimon/spark/sql/PaimonViewTest.scala | 21 +++ .../spark/sql/RewriteUpsertTableTest.scala | 21 +++ .../paimon/spark/sql/RowTrackingTest.scala | 21 +++ .../paimon/spark/sql/ShowColumnsTest.scala | 21 +++ .../sql/SparkV2FilterConverterTest.scala | 21 +++ .../apache/paimon/spark/sql/TagDdlTest.scala | 21 +++ .../paimon/spark/sql/UpdateTableTest.scala | 21 +++ .../apache/paimon/spark/sql/VariantTest.scala | 21 +++ pom.xml | 1 + 29 files changed, 879 insertions(+) create mode 100644 paimon-spark/paimon-spark-4.1/pom.xml create mode 100644 paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueries.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/resources/function/hive-test-udfs.jar create mode 100644 paimon-spark/paimon-spark-4.1/src/test/resources/hive-site.xml create mode 100644 paimon-spark/paimon-spark-4.1/src/test/resources/log4j2-test.properties create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ProcedureTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DeleteFromTableTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/FormatTableTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/MergeIntoTableTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonCompositePartitionKeyTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonPushDownTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonV1FunctionTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonViewTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RewriteUpsertTableTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RowTrackingTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/ShowColumnsTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/SparkV2FilterConverterTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/TagDdlTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/UpdateTableTest.scala create mode 100644 paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/VariantTest.scala diff --git a/paimon-spark/paimon-spark-4.1/pom.xml b/paimon-spark/paimon-spark-4.1/pom.xml new file mode 100644 index 000000000000..21e7143463cd --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/pom.xml @@ -0,0 +1,156 @@ + + + + 4.0.0 + + + org.apache.paimon + paimon-spark + 1.4-SNAPSHOT + + + paimon-spark-4.1_2.13 + Paimon : Spark : 4.1 : 2.13 + + + 4.1.0 + + + + + org.apache.paimon + paimon-format + + + + org.apache.paimon + paimon-spark4-common_${scala.binary.version} + ${project.version} + + + + org.apache.paimon + paimon-spark-common_${scala.binary.version} + ${project.version} + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + + + + org.apache.spark + spark-catalyst_${scala.binary.version} + ${spark.version} + + + + org.apache.spark + spark-hive_${scala.binary.version} + ${spark.version} + + + + + + org.apache.paimon + paimon-spark-ut_${scala.binary.version} + ${project.version} + tests + test + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + tests + test + + + org.apache.spark + spark-connect-shims_${scala.binary.version} + + + + + + org.apache.spark + spark-catalyst_${scala.binary.version} + ${spark.version} + tests + test + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + tests + test + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + shade-paimon + package + + shade + + + + + * + + com/github/luben/zstd/** + **/*libzstd-jni-*.so + **/*libzstd-jni-*.dll + + + + + + org.apache.paimon:paimon-spark4-common_${scala.binary.version} + + + + + + + + + + diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueries.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueries.scala new file mode 100644 index 000000000000..88386e2bfebe --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueries.scala @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.catalyst.optimizer + +import org.apache.paimon.spark.PaimonScan + +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, ExprId, ScalarSubquery, SortOrder} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation + +object MergePaimonScalarSubqueries extends MergePaimonScalarSubqueriesBase { + + override def tryMergeDataSourceV2ScanRelation( + newV2ScanRelation: DataSourceV2ScanRelation, + cachedV2ScanRelation: DataSourceV2ScanRelation) + : Option[(LogicalPlan, AttributeMap[Attribute])] = { + (newV2ScanRelation, cachedV2ScanRelation) match { + case ( + DataSourceV2ScanRelation( + newRelation, + newScan: PaimonScan, + newOutput, + newPartitioning, + newOrdering), + DataSourceV2ScanRelation( + cachedRelation, + cachedScan: PaimonScan, + _, + cachedPartitioning, + cacheOrdering)) => + checkIdenticalPlans(newRelation, cachedRelation).flatMap { + outputMap => + if ( + samePartitioning(newPartitioning, cachedPartitioning, outputMap) && sameOrdering( + newOrdering, + cacheOrdering, + outputMap) + ) { + mergePaimonScan(newScan, cachedScan).map { + mergedScan => + val mergedAttributes = mergedScan + .readSchema() + .map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)()) + val cachedOutputNameMap = cachedRelation.output.map(a => a.name -> a).toMap + val mergedOutput = + mergedAttributes.map(a => cachedOutputNameMap.getOrElse(a.name, a)) + val newV2ScanRelation = + cachedV2ScanRelation.copy(scan = mergedScan, output = mergedOutput) + + val mergedOutputNameMap = mergedOutput.map(a => a.name -> a).toMap + val newOutputMap = + AttributeMap(newOutput.map(a => a -> mergedOutputNameMap(a.name).toAttribute)) + + newV2ScanRelation -> newOutputMap + } + } else { + None + } + } + + case _ => None + } + } + + private def sameOrdering( + newOrdering: Option[Seq[SortOrder]], + cachedOrdering: Option[Seq[SortOrder]], + outputAttrMap: AttributeMap[Attribute]): Boolean = { + val mappedNewOrdering = newOrdering.map(_.map(mapAttributes(_, outputAttrMap))) + mappedNewOrdering.map(_.map(_.canonicalized)) == cachedOrdering.map(_.map(_.canonicalized)) + } + + override protected def createScalarSubquery(plan: LogicalPlan, exprId: ExprId): ScalarSubquery = { + ScalarSubquery(plan, exprId = exprId) + } +} + diff --git a/paimon-spark/paimon-spark-4.1/src/test/resources/function/hive-test-udfs.jar b/paimon-spark/paimon-spark-4.1/src/test/resources/function/hive-test-udfs.jar new file mode 100644 index 0000000000000000000000000000000000000000..a5bfa456f6686da0ca010061cc3d690bfcd2ac96 GIT binary patch literal 35660 zcmb@t1yrP2vNcLkI7#8|?oQ$E?(XjHP8II%E`@91?(XjH?hXZyo}Ty5?Y{rK?s@mJ zSh+GGbD#4;?AQ_e#3w8983YRGw=bK*G?{-q`R4-&5EhV)y$P+7A|%jff@@Ii`D0&g zCs$~oPat=nfPg@dKHew$Z$AzF&rj>w>KT|C{Y?-9A0OHOx&?$kf|%+V+Su6sj>Z15 znSZmP-!=vTWc-1Zdnp)I{M%m9fPgUnuUIsK3OWjoHugsUxJsT6P=o(|m1oIq;yt`r zo$pD)ZK}}_Uy!lBU}>_~nmfVNIUyh^oXXab5H0eGipSOqkR#!Cf=|vxJ;>sAquQ|? z{R${L+nklZ03W!BeHqlESrq~L7SEv!&kG6!wHga-kb~xppD!89KB~RQthEtd$`dPsZ>DoRpOb-GeDrP?09%@Y?FL$0(cvu{Cj4o9LD z2H)DxT;;L*l8Uvuww=wv;M43F!-}VNOp{H!9!`f-Z3Qik%eFyVP7buWffLqBj?BG> zVef&OZea>NBp&))Wvs%q)Gpx&m7-}yhhN^sila4W8Et%-cMlKHZuOW0;86J|8bO6g z8EGpmFF^x^2T3aHCj3=5r99gG8qY?KcbyJi7Z}304ZL`8qlm2KQ>Tt;_;3)$(T3i- zrZ8N)B*`>6dy44DW0M%}gHm)^{)tma<+I~ z2$YbTe?Q*6>EOjnHVD?7qBKf=%_3B@P{XQdhw6NY09ucux2Z-tkbn3GNT2Y>`%tp} z6^<|7e~cv5|LcF>)Xe#xq3L$?Z^P+#b{zZ(5cl5$8rWDn+MDVB9;b4D1#J5#+7bOV zu#=&q@$cwFe??2dqCb+F>TjYOx$0TjTK@Bo)A{dL`JHROT?0Hp;4BOX5RfJ$5Rkxs zxQd98wUNDyNYn%;JgQIWFyuINriw0>^sA8@9Q}^oxuO(G~LNT z-J;NEW2E#d-bb5W(hR__K~n@3@KJl#C$6vwvGucYyJb-PqIs_5O7?Z#eY6N0oqjaQ zuWJ7i%*DmOrl*u|($;ym!gjS&C1=%syJXq97g5{BwOb9P9U{M_sa8&Nc6v1hY@D}f7o1P=;uS9B zuF0~rRG+HAG+l|jFRGKzadD)dLWxGkfEGb#EA&Ha%-O8z#|W8^Aj^UutD}&d`Gbhg zNf;#ci0wHJel-mRLoCvv?*(`$DRnoUbNUF6>#R+;Q=;H*Aj1(E6vvK2>QKe`Kmm+- zo7Q>XE$e$#V^j4b-3gfXI8FmgQm8Xrsc;#A#u>zlOkmfS`ZGFLtfd&ZH#B&De9JRU zCcQ1{a5U9=TTEi;&uE5dhvDnpaDmjTT`EjbTH?GkvA@WnjaI)_>FXg|fVJ1#!k(Zo zR}IoG8cC( z)T7-g$Y;-4t;ytTE^wLD-ebYY)_V-(yWO!KHvk@F<{u+u=gp|$dzQP(4NyWgvpjaI z>qR1+C(-AnSuI7FFC6DY)l}K%9&ws27yQ*#Rx9QCTWEL#@mA`2Vv*-UvOH(rg815i z@+&FE`0`~Ikh3WI@1XW+MA*C%`TQf&cE5E_wM|E`a7z=X!=y947WzGR^BEd4{Fgk+ z+T^SK+v0z!;4&06Y!LX6-oO&&k8*28>(tNc@2YES4Q_?}=*>cj1mzLyTDPk>0_dy4 zJn(<1J?BQi{+2$kM)_sG_xDQOYqql;q_G{0zCJ#kaeO0njTb<1aai=t?{ogbu{gJ_ zjq*_A+F5n0=|9tnbhmAQ(^T94bPc2fA2L=90 z>1-dI9-)=CmBz+3=5#o_8Hj?m6D?Vnj7Iy2j`LSs%l?2OuM-aWgh2B>sP1_=E_h@a zcrUEaXE-ZIbRCb05Sm6 zEMTW0 z%0wF-2X-Jb3%xhv9GbYZRhQ$>2x51_X9>Tf%;?dW0Wrqul(tIUtRYdofL(cD>uw^m z$j{LOO4GOIvdKgpxrazt20Gn$`tuMy~Hgg zLtqG))1eKGLtJQyX{16V;$ynqW8FTph@I5fM7vP-T3H4*-a!Axuiurv#mh%FioW^S z;v+h}KBANNKa0-)BlNC>KW}u*2COO?hf}$gmELlOq67>{A(yy@>2c~5F%s3+k)jurQp9_-Ws^%uF?lr)Dc4cH zHq#9s=Zy%i%E9*-E2Y&FHg}(i77}unw@8cJaey$tc>Hw^f>>#d_WFZL2ZvXjxQX|IY(DXOn|Yw zhxLJ_{;qeFjoMEU?SxD&zUPK^tlBFc6hHo}X4>sQ0cG0B*>D$QuFlpz2||cB(s)@w z1V+3bUPA?dh-4VGP7!fEweC!a+gH$ud-)mtusz&$HD)!bJFlt4^v5NRn_4!wSSD5*m&FL9|I zsW7@58?0ZUkt0A!RuJ=@M+tCZB4Xf{XPmLw1{YJS8Q3&q9R1cm3>jDP1vsq>a#XI& zGc^;^JT6yEo2PHM$ry7aIXlaI0SzsISn7qyOa`1ix;e=xi|*bIXmBzUAF@OZflV`e zW)@EPnqUZoM$g?aS5!1is@!ORhwy4EDP;FsHFNd^EIwf~msUTZEefBbQ>Tk+$cTMR z59ve` zwEmTh{60s$%f9isl%RRsC|Um=mwb7dgif7J8+=M!=#8VOS`eG#7k zgRp@3mq0LN0gOO9`MzMyWQNW7P&s4+ig7wJ>BLI9DuBRjiv94Lfck2FpTAP|>j+@W z&^)u7WDT@ylEEsWSF&#i@`7YyY1b^v`DQiqAd`5(sc;*$oH&q~-^4QMMPX#M!gk{h z?baec_L9V^y_Sf~(0Ta|iiGyYQ!ZJkeY4r9#*`YG@gmW4VmKuM)O!Mna+Q3gyVdDV z+_;39^w#v9_LXTbu^C-)1Qq8&Y*hD}g^g+f;JkiMrDHB#SY*75|@XX;?xHxd?3{FK2Vj;ppiqA7y8 z>~uSrgAR#JZ8~R%a-f*LVD||X{n=Rsk9>#S-A)!%De+e&Oio;kxKvsa)V`&)+-@;T zkB{h8CrQ=dLXn2QE#G4o*AI@=#ayE%9ij%pN*kh-p}`Gt20|N)XI%46`oSbY7G(VA zL(L<%fZeIsh}f+0^!_jE`9loV^ah_tiwJJkLL~vEhT;h3S>Y4&qlEYJ{;@TtKwE-IhHt+Nze3I0rt%x`eEOpT z&YUAMUVivW3m*!|_dg@nU(+aGQOjnA4v9;9B_Of8nV!4fq#vpl0s$X=vR8i(6$TL@-V0hkctr3#DqwSXAmcJ z!FRfAN((t&>_zF}`wrV#^^kR40|W2_#u@9V)1LCDlp3~`r`y^)Hzu1_9-Fi1y3EmS ze)Xd8UK~M%Z&A(L5W1&xFD@oq&rWGb{+w{2)73ty3$uc>fchgW4e3PUll2y}9;6FO z3HBWF~>YB_Y5G*2RZbm0}AHz>%#Vt5({4#`jGHv2AL~X)W=|;Xq-aFJkhMEwp zcX7{04o~~Y;d1}8Q2Q;1i&_6?8C*jKQ4T3!!7S(tbv_;*nJXWN6}TZ%>qiEM5K91t zLqAz@}d@#I6bfGmZVLbKL@@pmd*A879`m{sE zp{%t;X`$x|QaGsu@kExL!E)*_$RoFfk+f>68qg7{!Ss{FG2b?;6t#w359j`}-CfF= z<(R%|gOt)Jobj>weh?WzRyJvIDJS${y^Y&+VNA@U4U*-;ZgXMg8D+x|NGWnfr|ga5 z_j@B9W1T{yCT%m)giz-YeTnHCt0M@&0OFV!=a=t^xqEw%kSut>F-8sBM&)c8Qy$Xs zobBWdMo5K|$U+_F%kW{jpEKlEyREQYavQ@ufc?g9KQt`#Q~FF-N#^T-!Dc2|QURfa zn*)7~nUNcTfe)PP4xELN#V8>^oz2r++JFW+M{a(;j&5TSjim%mS`tkkkzI6_mo5QZ4`C6i8>%o_+cAJgo z)*0SzQ+ec!mwjoTP&(F0gI^VyoV?G;5TwonrF5)!2h7d$UL776xr~0P!o~a*>3;>e z?BR%+2W~2-J6)&ch#n5EbjrvNu<#g#Rjzwh!@mr#Ul7~CyJB0S)!j5wYG~-MI4PNO z)6BjWiu?)!rjG?{4{o82BC5pwV$SXOGi*`i@I?F#t{fMmI>%i7T-D6W2@-T3-qfSa zAee`S0iY#oyjmZbW1 z3MvpJ8iZ)7;pn3z@+e-R{@$_4LwA*dio*mi%5*ga?h~3?6td8ZHc^|#FKZrNFviBn zap1Y(GM?ggx3dHdl-CF+fJJH$yrmc~DV|yjMIA_D_yawDzZQTdZztSW6$%g5m$^>~ z;8LhinRE-m6v{z;$~{SRoS8f53(YbLUfgjW(lOF?F?ASnlzfL0U`^1*`rmS5wO}AB$sT!Wk3yRS z+BfY8eKV@W+;lN>MorxCoD-rn$Gd6JYUZ#zGSPn~qHISf!u4#3EO7^KKR~kO^fikCn zWHLg%feNI6gl7tA73>)#R%`)iIb<#V&}18FQaG-ZF2Dqll~jd`wQQTSXY}2Q#EEZs zhWZ@oHtmU6PgV!~VYAT&zM9UwY6T)9O-6;%ywN$tLq9Vx`wZSQLj9D=U(>T-xU zFBnjp$?yhN-WEY>w2VWNiCxpr*(3496WbUoys8S!*{6Z-A~KI$P_34MVH2y=2G!5>?F# z-eYXO1pW001+zitd!YoA@imjzPxCHT8H1)=OrV3R0#^iBi^W%)*dkT29!v>=r@~=) zefZtkYY0e#pai1NfdVFIOSNq=Yz|(oMEE)HX&?zN`m!#*a17+cr)D9n@ zD3spGcTEo!&R2LYd9FK&fLz` z7%E7SJ4}@hBeNN;6XX%YRx9j(>o`h3I*xuT?P7?Mlnnbe-=Pss6(r-B1FZ&2lyV`x z{ngkDHMN@50~e{Ey&2WiqiHFWafn?hRYC4Pq@su|isRYp?U#)*_QJ6oF^c|4r~(Ch zsh>uvC$5d4kB}n1H39ru-be**(kC1q3f!8wBb2ANREc95Mz}V%nR{#*>MV4Z@dSF| zwZGFDz(|5wjhDN#(VK}?M_LhLsBVU74&c-t9VQn1x$hX&8jgw1V|TaZ1-;|E@o5kh zh->~w-*I>9J2=Jv;_^W?#OguNsx*i#-%E>$q)cahM61*nA#slDIg!|u@K)tm^ryj@ z_eqdQe(!I$?eMv{QYr1j(O67RWP4_x4Ohs;<4kr9`q`6z22RGQn}IKZbQ|jMby+ZN z!Q9LL0Pj2O^}d$0{`ti*q-0NPRkV!2lB`s=UZ70?nem8&TJR2BzNGy^~qgwQM*pXF6LGto4S+BnB$&D}8L5*qD7#{PcrSEO1Mx5RR2&x5yt--|4fT@} z1yNn(oX4-o1BctLc@tC1&WDxNO^f0N>YrvGsLk_W>ZY321J^s|9(QCg1X$%}>=9lD zEfkCNqQwn5{&B+oFOsb?NPM06AldT&PDig`YGePO)r0@)=$C0WAfUX%rmb_rsB>lM z@Ks|mph%R$*QHi|YOS8lpCxW4@N`r9d}7fwB`{^Wnuh+(G_EHH5dfH{j*hZ%-K0OH zv%TybB-sElsD=^*7L(-$i2yI?jls7( zI1PMA4L>lXN^zQLnScu?ttYdbD~1SsbE=_XR(BR_jB*Kt`gFxJeskkc9#d&>ahpPxE-fv z&Spu;{uyk+n!6x`a=^g^^_b%Ze0gUZMw1oWd5$Z0 z*5FK4ZgE;L04U5e(@h9UO6&n<(q6$SV z@U`U}0-Z4qT}<9Z$*$|E%HR+XbqT8650d!EK64YkCcX}HYe75@^%o0nG9O7JatjQy z`A!gqfbv|KVy=r($t2mP$m`_P_;w2NWK_ySujn%eQv^iT1r%ArdgO8cxHqBI}!z-lGT3Ej7xndd1-7Q?vA1#+09le{Y%oXcQ* zZalJR3b?5iA*U1ng(a~2r%p@6Ogrwk+A#NnTk`+&+ECKwKdTJmBz|kP2HqPooDUtz zARwUHql3=Ah5O2cl7Qg+_)(ywI!}xDKzvpMVKu8$7^vP^|^gLo>Q~|$Ge3%~AVtql30sw2i zzG7c;DC38Eji~&tUL5(G_~WmxI*fa zy{5KKFNtNRLIk7_A3&MkgsXv0Ys)zWJaZyj$4P5TX35h{O}1Ba{sx+a_Ua#Y*`XV8 zjj57<%EM}BgT1j^%*NL~C>Qa1Iveej^puK>5YFWe#H@l*`LBNFpxpptFK+Y(Vx9w$)gp4P)dImtnObg!209Aqu`G9N!)A^q!>Ul(C!ahg)`G z^_yF^@ZpwKnmyx+^V06p3c4WSyBC$->I4`Xm_@~Au}fQUsF^LJpV1p2jxO>Z8howw zn+NHxP0Y#mCE@J!uD(^LW4DWTCMi?~*UB}0qOtCshi8s=BIA&Yg6lGnzc<9BSm&KO zJuWIlLe2GZJ>-z@CndX>vIbb;5p?MRSLTl4`KW#{OWfKhV!LNfbM3<|3peB;eTF7z zC%UIZbJZGKQ8QS$8J^L@${7_oD#M$N%f6vM)|DE;dipV5k@_W}oZt%i*eb;vQ3@Bs zEN`xgWkvj?q%itJwOob!?A)goiX2+mv7Z%DP&F(ZO9S0oq^xRlnAKHACf^${HqHv@ z{lAwa@?m9bN~> z^`EBCNd6MnH!v&W^q{-{u$^k79j&5lvg7spf znL%GahMsWXX9xal1K)%n_#!a;Pa&c4Zo`xN^Y}Tnr7f{^3-U3Z34z$?4ML>4hB^6M z@K~X$Mkl~jDedBxs{~Z-(J|o(4+TGKT%rSEGWJ2G8TDOR;KT?JwckX~9=W%Q@(nhS zKX2XTub^s`gInvs}iKL%TtghVYoGIZJdP^oJgvjO4F}TbVDZ)Bpc-YGr zHIwXGZ}^Y~`c}QNM>I+>=jDhi_9F`|L*zO3GP69k(793l!ZOPOBp2Tu;AFxIMMLH7 zJcD44RD>Je%%fxxuoY#CH@>8Cg+r}+DR00Dx9;{pL)g5}nAF*&O}=4vWc2oSGa9sK z<+o*qLrN6xTUsLAzh~r|4N7w{bmtunLBJMP2Oz7yI(#;99nB6u{YkHZ=V`=SWVd@Q zsWz4&lO5%@yye37d8l74GDGgu_!htOR#kAA&`n$JQmPBLWbJ&vdU@_#cYfIz7UHM^)En#lBE z9=yM!*bKES-eB|{y_ggIlF4G%3As!|Fw{a7D;EKzZEfJi8Z@$6o?Ns4v zJQ4ekd&F<7w3|je>{n)=vfzKlRX$O1lWrq>X7aSF`_TqJ614Z?wrhq}Zq{M6j4J znIIQdmSU)a25_*mTK3G|T5=ccJt3qPbC(!>y-@?RA@j`aa?zBD&4IC_E{#NCP$;oe z{wZk_dICx8rA^QDcG`NI!iaGNM+<%?u1-T#xEC)~;K@_mhJ6wvS>%Nk^9^^(w3q+p zVR9cCFivsg%a7%3i!MTAJxy4O4ZptcZO<2&ebXbeN_S_4tbYL9u(*CyN&Y~f_Y4*Nbq%4`GmWFptUBuzB)^-RJ*ir9tot?$AQBezm- zLEh2Ikb>+anQSS!Xnd@1VMz+chD<>`^1h}vuo3@N>rvi;@bP(!bB z)o2ihcU7#}I);sxqq{3_kOCc~*;nE#)rC`8mITU+G8aYZXoJj2Ob~!=+IiP-!=}i% zoqh3DOXzb<7~)~=)n)R@ly)1(S7JMl{rt=cv61E*z_F`xrVYm3#XK;S+j93K>%%Ql zG}W>5F{F%c+r6CaZ!SQHlWK--Tujeym$9YUvLjyR8C2-%YHQB1U?2q**4?KI4kUcZ zG)s<7!*eFoHmFXzOX;0FMknj(?U1BiMC(802LIYmXmFoE%ong6ky@ zM9)>;TRwZ%6Pm7a=XkK3PLHHd&DHuu4-CDHS=G7Rn@FB8#&Jh)tn%Y3zs1>ca7|p& z!G>vxeXT?{pFYC~Ca#HY*wpUS_J{wp=@*o>&UAg53G^D33X4}2^niOyv&0b_B2Z;>BV@$MT{w~dY=b}x?A58uqVUO5Rxvj?NtCGp_Lveg#a zf2g&=QKaQ-V-K%{E7+VHR+Je`#g|;BBRB^98dKEpfYS>)yYBCp!R$^Q&>{-B#817_ z;goDXG_O&6?Lif{bmGJCO5{VNUkFdNtFyNSHtPpA?KCSS)ZD6rYvd>x%vMdO9 z)Oc{^I=nkj`BJsemMkTUIKgG0FsC^PC%_d>pThOS4OMbb3aJnCF(x*5>V>!a2Z;6S z0axqm1gRBz_DuHBnWe0V_>!4vrrO=o>eZC;z2^k?NK@0da9YhaJ9^E?Q|{nVu2tvF z^|7EV5E6{)Q<2z_;2z|nMN9mQD?!RALL44QeDi#no&dRf)<9L#{zLD?fJMuW(||LRj|8UxJpP`x9?!t|{Qz4B%Xw96Y;CoDd(WIE z>@ME5VR=sJ?2sZsRH~TpO6eNxfF11@_;O>ANDH|R@LF6$um zJBGJ2#N8>n{%)|v&n#uz3-@veRbhBX>Mz>_+&>YP&K^Fs?4~|fBRX3-FRt-#QbSdp zfDFmNMhKnm^(dM3M^2@3i;LOkNt3W9Qoo+8FoBf^VE7ZpHmO>>tlJZGjh~f=4_nUw43YO;Io;w=huf=DT*JTRPCBK%%bBdFHwFi_5LkC(aX3e zmX|rTDm!sWQbuNiZS9xS!;yn?$Lr1vEzs#e3;b*zPbh1oa1}T?@`OnCCa4(L4?{X% zra&SW$=ID(KxG$%bP-xaHj0590jW|enh>Bc_lLD+Y?5MY*|t)kxzF$|g4M^6%SWT7 z(O5vgyfVjvpeizEHbn`T*=7TyHkxV4#CqV|qQ}CuT`e2GSxrWGMw(!i*3QbkdVWgB zF(kROK8HZXtf+hF>T!LGQ;ZFTx9%EOLz%Am{=-cJ^n~_k5z6aoIFv zZ}rjVo^EJ+ZUJnyv(#~tGjeOpZlx~tW9Vx54dhh@EB!_pQQC46wFW07E^#X6WYuBi zgG~8E&MjlXk1FulS2n{@N{7U+vl<|vQ9KlApcYyF;^3A;OEU-+C2z{jU0#LN>{8w3n|T8dACY%Jz9*ejY!{#>GA8%vS+{mz%2z!VK57rCi@R{;<|<{*rFUT?M~))b$>yQ z47^#)=}PF0>lMk?SMLEBqpPc+z`{7l-&nY2Y;~F=;N@Kf1`@g|_s}rS@YS4xlOdy` z1&-+0QP3~|5|*?APX*Ld9r-D&h7h#h$ zzr@<;Y9{tjx-zKNn%>q!>MfA9PR|9r9sBp5S_d7$<0#g1{m`h|WLo25Nwlcil{5 z^Xj}c{9LI)l~ZY7QI&31Qv7&68%wQ&F7pMJe%7_i|H9s{Kr&uZr+B?3G{gFRI6_wG zHeL^s0ZUWKLiXy5fEs0Fj*(Dbt>x9P1UE|&u5BPyDiRlt?UKXgaBAH}@l|xPoF5g% zPX<*5Z5lhnFp8dnu~3$T>QX!3DzC_C8gwsSG)kkMgSd~eJh(C`os3Br`HQI{*7%b; z8Z6r}o?TGzM#!MeV)phB4zkRzNCCm4j<5>R*ulzg%az+^j{Z0dj@!0qzf2B8?zi}N zZDZC4irwZS;hB)&d-vg2uUhoJZ(vZIKYB&EO!T=Awa{$; zaItbsQZ7wPtu*+RKwMg=o8!J7mCHR{V`0XHsr)0zw5!H?IKB0O0KMG<#LE;QQqr46 z5`Hbx)~$n8soOx{QyZ3N=6%Z@Dv4sFU!wk2Z$elp_y+PvVYu~KF3b72muMRU2uSIF zUl{&c7V6Z@Tovcgy=9DJ(7wDxze9aO2p$82lufK(A6=EC8ST&D z=t4s6&9ARZ6I$`jwL(U!10{llxs{HjZ$_9k|m;^0RMGBtKsKner-na%zdOvu?fpj{?hjZ@8*TvoBndIBD_kgps zn&zkO$i{%A;Je?^YU9r0px!ARqYk`>oBLHXzl4Z9fzb>Bj#C126&$9Bp^ltw$W zm4xg`9Mp{^YdaZ0&~%{3x!sM=Nf8ufv{Ops38RzbH{fsu^2KB)nDg-mw9Hmf7y~!8 zk!OE^$ra3!r$QG!YUGYTH1t)vZ(OWulCL!=7ck@eG5#7;E9QdY9MgdBN%$A9i1V+$ z>b$(=Fm_f>ykWw3MZ9mFcN85uy!8Ar$#bfdRTVUr&s^$)H}xl(&SR)wL^#h@&q9t1 zWN07!HCzt0M<9dP!%&!A; z6w_-k4s^`=wn#zOq)MTyF3gIgWgxhRQejBV!XN0A_btzGo#fNOag_{evZ9IqZJ=tE z5{t&J@OR(lDivtUX-R=d#5n2FH9UfonV|;C&5);M&NtTj%npmzl&&C&89O%dJ0AfOZ@)M!7c2r|}X zTOW~({j@K0hmr&gP_a8omH>@7838Cq9&RN%`P-he*?EJbW)Wye+olOsxxoeD6# zaBZ=_inip_P6@Zn_}=C%$<3t*jot;b10!i%N*l9tsGZs;dmB-+xN_`lik(WD9q=(gLq_7T|3qia%pkFZXhgW~lUkqukv#$rk8cFr~HXjvU zuG<+ib4sRkNv}91h-Srt_H6ykS|SYz>O=}$H74AJzF_8mBU1RTl@!ayADbCGw;z&K z7Q{`bDTrZ%P9 zZMUK0ve3Q9Y~HcQS!9Yt{ghA#Q^hM^%kexpT|SKMbz3^LFH1{YN}|Y4joF(*$h2D; z6z$aDw56_~l_if~v<>S1iXI8aDIViOd3LmreY#p& z1^Y2egrMF^wrv{m)-Eo96Y})4w{af}d3RVPNUI!gn}l`y&@8A)Qf`q{znO85O`pn& zHHUXLt^-pAHz?bfwhGouAl8mLBb&u}G&A0(;h9oqt|`{#}Ma)X)oq=>tse_ zR)ElADL*uu%5Qm-V00jg8&d1WUi@5##Mo4P;fz+RwkfMV5=r$df!pFapwc`-odgtH zjN47*q$^m+32O>%gEO+ZzYbWaID7{O?GB{do98B{kcNtKueB=ykP-E5G0INao4EKc ziED^44ovC_vYT*Bm=0kQQmEllv#J-f{C@Olz4Bz?kf0RT#8jQ+j1 zJ>pb8*tdv_VVPY)zNb}Dn)=Mxwc(c|u5zm-CDsMBrkB|!TFv#|1VyFcW2)VbYl{W{ zKnRFCZ*{v5+Nb^#S|YPw%wE8brraC(IG@aBHpx_%Xj_F;G$;D8TiLXy5_PUK(h3U3 zwx7(8`j1MQnM8I-OMQ1=I?1$Y2w%lasANF)-Pq3*vv9sjt|d`bL&hS9ExSxAkMa>G z0@)G>BsQKJ4R)hn2i37hvuGurU^U?xLQXh;6=AW>imo}wfoRBsMjtq0?i zgjTYou)`teSN95~w(bC49ISahw7a^FXp6<`eZ+Q>~6#y4;5LK{y_ z8olP1PE$?k6rUwxjjA}c$sGboN_Pb*8T>$kEiAU~bXr;%GeLCN{DcIR$Xdv{qYwuw zol~_1alO4m?yK}V8(6K7>>SR(YPp1pmvn}Mw5>eaYn2?dBKMsmWSV<+o5pfrZ85|H zuDfm3DXV-#|F{Y}PIYZT0oOckQt=*R($t~@RZ3N+oU-@@j44h7+N9WL%R{wH*WhQU zR6V*Y`SnB3B1+I;pm;Z1G3pHDNI3h$t97f*+fSlpL^0^aC`-;@NY}wcQ}b6*{zyx* z_zOa!v|(Q-Pv1UgZ{i5wrvv)7_SvO?lzfm@=$dCMbzKYR=u~b=32t9ctZFvoJ{gj) zGD#iW#JJooPcGjUrnx z7aVt%t&vbijRY{Y2V__(cp_o}fEF9=;lQ2ve7G7qOcl~oZE0FioP$%naon9@_~Uz` ziU9?(KDpxuCTh2MWskr{E{O|n>ywKD6rn{3Tu021NFoGmNFu{ZNUPbFzDQt^Eh(ti z_pKB~!(~09vY$+PmsDn^E*%*)mg-vM_haJM!{V3X`GY}fzS~_O^XJn^as*bZvd#!K z+TU+n(LcKwr(h)vC$oZ9Q%NmyMUKqfVs8Q_9tglE2w_dkJo;g1=$N&|^`ixlfNL9i za2|&Cr1dw2qv_*fE}z36qAT%UZ6q=A?1><|+x;cRBU*OFMmd30im9CnScTQj_*jMw z8+hCcmzVrKE-6TEq_{vU6)ITdIi0NK>zl-6A|!BtM=98gCEw`(BkK^#zW z)MY37XC}h}G*-{2t*Iz>Y+#39B2Z8drRr^_Zc!oERwOA=Yo}~z-3>$v7y6j8cpcffm|?KIb~xW>uCTCvO<_DFB$vyPFw7?SLTB7x52Q2` zUq}|@Lbc*v`-mhw0SN83jTK;}TSvk?S_ZyCbp&_@-xb!F;oeoesUri8= zs;}YJme2|k#;Y`X;62ibe2MspT2w7+_f@AxO9Q!8rzUKj3{-l8-9_KvV^|&QPZGRCBn@F&!r=l*Tq?a_*_!C}#jejeQ@D`V84- zov3wDz`N2H&U@&urxB^dLp;gN%0vk#+DNRRt2A#v>S^FTejECnF2%K`wlI07wpNqe z;o@xWwALtmV#$zC3pceIXp(FeHsH}OHE>I1KvG;#*Z+FiUi$1ST~mnF<7|*E_&l`Xouyq2|`1z|%{<;sRZUdL)_o zd!TjOj)8eU)STGqbEI!@r>?3%BZtd~7hJEy(*gZSU zV*pF*3(w1{jX1w^B%Sn8?grcSCV1LFO7%8y7?xU;#OVY&;%jaxu1v%F35WdXIK&wYoAn>NBs<9RD(1f*)L~xIw~2oMA7Fb zu`R6aI1)Q$l2arW(nf->FQEC3htLi9hcl$U;qd7W!aa>QUEK_m`Ix<;_)NV^xN<5is zpj8uvJfMr7(86xQ1!Ar)D8}`x|J_El)93P5?+rWuy*_FgR<{^t)BCXi*Jrtk3Q&vv zZG($#P?D@CA1r1DH~t@-)9+I|+dpyC+$=Z5LXz>rI*;9m^A)7CBP=JyQ|cv_g94Ij zW-zAVFy?jJ0$~PC2e(0b76ppbF{Ha^ql5Chg!y{NEtR&+FMlfa#9I~^G5VT8k)O(e#*}^;B$g1%9rpj>CTmMgbF8 z$oRS!Ffg;a^ZZF)v4lf_aIUK-FrmKCMrFYE*!rZ2HzXUiHC#It-N98G5G*-2-4a|Q zci}Y~{dCYfS=$2*8gqWbNHI=dmL2XQm?%-(I|9+2=n3?73iNt6Mqn3uohkz?l3}*V zJF^HMOlcgR5Is!1(FOFH&@B+`vf5vvbl5HOXNB*_~M405m4_eyKfSf4uB8 zy0K|((6!pekduv$)wawfIat6DjvTA5a4q&Y*(y~?jcLBo3OaqWK!a`QDO2wlvL}I< znKMa11yp4BjC{DIKhbqA7mEq<$p04jDL+5KGdG%)O#IhZRRloGxkW??s|<^ux-pQz zKrXP$0`GnRUt(?j_3*Lwi+)Bx>hU9%oHkckFl(u zm_BZ$*y*f@!O}G`BdXsN%N7+2DHG)iARCLGommWRCb$F0L3tI$BL>y}oUg8TI7^as zK_0%Wmr%VXS9Jb_PjJ%lJ@tnOj1K-ITVM#(l#7{qb^MM_b`V7Qxds*VCPnw!F#8_+ z>?SBvnF5Ugo7A~7dj)E~5PPuPE);pqD2z|kApFxVl(HrfV?05xlF4qfW z%hE*fPn|=<7fQ=}4N!ML`5`aq7y%G6X}Vt|jXnQoE3cKKd0f1tT~kj|zICCAx?I2A zqJ!XNDMzHPjw@UTK!ZK6b&3xMd`gHelQOy0Zgm#(k z(?es>*kX7+B0m@_b@!;F@zJsGIljN{glqJ(Y#6r&@(#H&*iQfx zdJuj#G{lvg9;A6rV3XUJ_eL+JQ64Lf4ScS~Ms_1A%x_Nre_FfBs4TawOGt-wcO%{1 z-QC?G(j@}Yosv@00@B^x-3<~F5&}x+_i_@ho^w3+yD%7o_Xlguz4pW2d+legHK#(~ z2)*bl_k=}x4M7?9bxw1y2U*xj>2PNJnL5Npb?)(uAKO?#FFJ#aIbGzkKw96b2gb=) zk8n^qsN4Ujw{epNz8(YY3fND9fVlqKL)lMU)v0MIp@^Z}CV`n^_T*vvyg^_hdtRQ- z2+Wd{?l(|gYh2e$11*}8`Xat<>8%qjWL@Ib2I;{|DP6_5eq>A3T*0_Lp|r1{U(E_7 zH;(tpAwlZF@pG?^_ftw*94@8LKHlAW;Qz}CvEn^7Cbq{ zq_EkX_2|)EQ?N~SM;QDRJ8obt`<=^^bVtKEiZPveZM2HxD=IW5`YkEB7TV-r4OdE+ z#He;Akz*FT+K0_wPGF?r{j@c)h&cml6dn6wyG~+>L8M_)V}I!Flgq&l)6L_s7wDB7 zf>}$w7M`Z6188=Z*hkedPNV}(&0suRyJ`FMD25XY`Sx`&tNTec$}71n&!4Z8kVtHs z7caWb5fMhg)Mstg_2ABnzuMTuJdJo+Z|5OT7b_lpg$Ad4D7~iHP8~jhCswz`6|YWW z5V6`CWV9(0iE_4sH!?qr)Nxj}wBTmppPIH6V5Vv9{cK^ZUp3Iz`_nxCF(Nx|dyR2G z#y%wm)H^2ooV+(ZRv#H$+ffFsLe@{GUwR0TuJghGOA9WVxFr7%A59%1gWed8PM8tyh>4seW!W15|S1TDoEpVA}s5l z>7g8dlm0+&AepFUi~AC)+6Tm%9tY*voFjF3Cvj^udjDuhJ79uiEP!TQP5oQavlMyN z{WroiLO?5+Ec>qtKp-j+pYnF#8PpSuhtoF#>9xsFGp2Y5@+BCV46Q=PA7=PO9x`PshFY|R^h}vAjm$KSb3dOUYq$U~(&yBcC zCKTBE-wU;~!6u}spc$D5kt!w2DFmL6SfHA+aI(dA&!7g*ba|`iymVzcw4aVUq!BW7 ziDj4!4TzhC6}gdS zn-k5nG(~|@hmhm*WVd+M=j$EFJd`1jl*=>8ID4{VCN@iOZ9K8vK@ph=5ntTBrMKG= z7*wRF@`h|J2hv^lSSs7oKUxpKZr!P+OrW*|>p}o^5JoFV6p+2O22!q;m&!^~(3!A* zy2lEo!Y*p^VstKGUHMrT6ub8{e{=ww1saVh4|7S#ME%cU5Oy2zBt8^-8S9_2eb z>d9uGB(A(0%ppUPO6bpDdHjf-sG@w^cu0g%o~f3q71sK0<=x5cX|>&=;+WsWDH}a2 zh1$z0F`Hm39b(heWUC5+eCzJ{67@v6h2IHu3L=4Rg`*M|ZyW)7*1QYv*~qx{$6T|q zmqeAw???TgWSj%voI?qngAuEDv){HGw`vMSzeRd3Xeq?C|B0lJ(Q2Z2?Ai z7trBy)V~k3=jNJ9(y`?mwcFXCeDHpm*3zk=^La@qyKc_1bCQQ}-aLM8jRm0QIp#!J zEZ;i)lA<$|IkZA2{Pd&b90i@YT0BP3jzuiUtgEae)BLHB|KKS@^ z^jSq2ITQf|k1BlFhIB}ukRT%3C)}gL>muQO(3y#FWTo~i(_9N&3&y#xK7XF`!(WX& zGG1%LUx{R#4#NYgfIxi8GWD&X9?&-9@^*cf4M?s`Q4A9X_zPIATwa0kiX4=qFvWWf z!C5N}bSRD}1HItpJxwkI4<2QrN(sa8!}p-1%M7JOf~mddU~BBQr5hlxKuJ&6&8kvngG>;)FYlqQ6tc+;BX%Yp8RGA~j=&jA;lk z(khcyibKjB=}A@)h7@D*L<%ijKprdES>qmv=@y^v#wRJP^3-QSFThPb<9(Ge-Cc$w z+1~DH+9*3&jJcy+(VxpOEmcxvkTbxIRlK3BOzeH+*!(UuR(2;9_6Ad;q(WDYLpL(I3<$10kP1!A^|h0npsVMF zp`Tg8cmQc^RLkq&A|K0UC=8)iJ;U`leZ8$1{ddGuJ$n4_LSc|aEa5%Z_WRiK#9LVg zOX=24qgg^ZPLXjMI#^%(HPp7XGFIrAol=@o99bmQSsNhMs6BUgLVWtH>X)98f8HX^ zg&GQm0}d=sNPk-d0^q9O^UBSB<9|QqWB&y|`zNlJrT~d7vI@%GlJjRk=YZ8QkgpV$ zoxJ8XCoBu zyJo|2;&xFIl$x-79+K3voQ=#1f^ zq%QII(8HxSv88@Sx!svfW!VXl^UKPz^ya>3WmgW3>L?-su8Yo$oOFlEs@odYvw%!C zX+p3iX@ZBi$V^i3xU0RMD9-OJ3Yc7az)_CTeO_Cp?Lz_!{g4Bm%ZvABwxuR10-tN( z1Y0<)HHlU?-up52d%U&^U!Y9G-in~S`X()M8BM4l0t?7<(vs$61CZ~YgvZ(idBaI& zh8ie|)6_HWAw`5x_WL2xedhGpk_m7Nt^0M{mJR&p?^t#0srUD~>;DBYI2Othd8FUtZZ>!&SO zG9jt=ID@)#sfDJ_)F?D;)6CA;d0k`Ze&US67h=Yu&g#(U=(nWy*R9RgN zvOc%U#1ft%R851No8uN30WgHSH^_nSX+vYBE^ThqJjAQPVyd^Yg$H9zHiAKg)B;lk zwKl!t`sYN-O~q?97Dn~?&1b!ruuchz2z>`&k$070ShNLXh4A9RvBBWf?D$0#d;>wT z0I4i-vc_P(t5`>aYb4<{i8|cOA+K+HJZ-}Egm;r$X$Yw1F5#d#-KVrk-@75kaxyY^azs`(eqfoq2QAdLn zo2gV(S=5%|&?JPxm{&usM-{qJ+bMg8r-l7_EYZKio{G>C^yW;T4&xNjDHtz|lzejj z6i$7&tqHE*W71tD1&dihOKiP0KpfsSn;B_jjidE_w(zFX2(cg&$q5L2`t*BU63Gm@ zILEap)@jC)ebEJ-NnS5T$3t4yMW23aVsYc88hK>L54X>s@b(}m2wd7QlStJagSiEb zTDElLVsZ25Y0c$u`(f3GU+VPpSmpLDNnQ!*Y1XD<@Ujt`3_A$%5pYu;EcV2683(Kh zEkeXeOJ$~o8?s?{FtB=KaEWbgO3dyFV!FZ7n2SLGa#<7dMXv~1dqJKT!GX4kDW%vN zSRVzVBF8<3T%vGJkQ91?A_xw#tMgqsa@Woe*ac~BGOILGXj?#bFj1GbkM^_Pt2eGe ztdpFW`%-ftvE`GeE>=!PKVf91Onr&3h3^a7zg>#r&Fs#9B{eZXpVUw9JlNrC4J*Ak980PA z6XG2`2#T<;%d#o$O!t|r%lNZdu#3&SV~frObA?Y*8KRX#kza^LjE@ipIWNBSdVhHG zpLCQ-4`arU5I{g9s6arn|D!X={r%=oB)7lSP0%7?k$@A^FB?MJOXCpwnIHqouxc2p z+X712yAln_UqDzOtczp4WCmF;{`{tCXuLAKZw;_kno>IR&t_CPEII%RC%kW&czBv@ zOc4yDUxf8@B%J{YCu$C858&@E4-(jbxqY;FnT-ZR37C)A$yY7#7cr0rpr9o1NwPa5 zXbEY$zv*K1iqm`KA@%t5y1itj#Y0{@@$r+Ob6(B#!@n$*NV!1t-0P4#R64q>faYHB z7&(GD`Z^$a4Lt%~u;uQGM+a#^cX^1Uix#OTbsqk-WJLwvd5s0C1ct07?l8dbBcf&G z2g<3@Oy8b)E{O6zJRVf?UL8tZ?j(6eOctR-4(xrq<(Nuk&gZ8th_c7qqTHF;aodSl zm`aBAX%S%->y{*0SPWHw^dUtlD>uGB`{ihOuY{14B^my94Qo9bD?Ac+KvEx>bo;Q1 ztEHQ;I6fn@5~f7n+PR-$_9TY)Faj$p6bqcvgwk6hO;Max+z~>WoVOpzYJG;@Lw(6| zlO|O@GpDSgOU6+MS6U-j_?RUY*J;r;+SyRC&Dz6U-O!Ney@ecYWyOqClMzboUs_~A znq63-wN1Qw#=TqRScj|4broSp8m)p+LDsguGRNYk&v7`kn{5U`0=2(e?JGoGgdwTO zqT*}E*Qnb7sREjdi=kQg@uVphQ=yZP^|nT2+?*KrviueYEnRdvB=^wpzv(w9H{ey97-a z=R4(Ea7w6l@q9ZyN=<@XEIU<_hv)2VwDoei`RXl2> zZ@=LBoMajwUCDt}M#&|@!JZp|R-$mGkN<&;;XTPWLT-EfcFe7?zFg$fMVJ#rp5ze9 z?rfPIMJ0Dc6`T;Vln=dZ>1N@1^ZCIc$~DwQE->0AdmWybug1t6)W$?->w}!H+G%DC zFzN}{2@*FV8gvtUHb^lM_Xr*1H{0XKLY9znjT`-Ipg`{e8Ohe|j(c6jfR1Qs2hM2& zpu$}wlraxhJ!rp#_JB+K8XbzON_7g`-SKw^o{Ix|(7A_EG2Z&|Ilw`RpNEFY9p;CP z*ow4oG2Vt)V9FVax2s^z9Quc0YQLeUA`axhoMGtM7wFqtrdkaMqhp{;IbmzRy;`GQ zKd9|{p^-mHgj0JrNz7e6T5n>WnWVyDLhoO23BUwb1i{Dy`d?5F^3@%4V7gTQLlp+dCOU4)Fj%s8BH`q?!3g6$c6UtN26_Cd) z(uyFtaSwBJiij|y-)?NYqrypewX5Y_(x<;_VnV|xb5c%fJ(6h5FO`dIiLa(f8z8m4 zmyPPt{6`o9{uQZz%%=4DisWK>ZoKU<+}GH+ydia)`E6qXAQp5hEZo)zN(Njp^)Wn!%fedjrPwm zOKs+=xt!-jzGyt1a0#NAYhqPQF6Xh(#1Fm}xEl93R_x+EpPs04bPA4|#Zh8P%IN-j z#%pA3$`oI2R_fbwnhad4KxS=}78lI5j8*3Wbgd-<(RRQawf*e;!pZYh?3qMiz$QQ8 z40V68CiBNq*?8dCfaK*B2QRC4ooseqVo10U?+~9_IMY_1`3fd$lFc0AoY3XL$wPtf8#y%RS^rE45;33W_C$E2Gb))7rrt+~hch zgjWj2-rlvxfK!b!AL)!^AQGYJo(JDUjRT2$y(^rMwZ6_>&XboWhPqpoK|c9L$Z5cF zc@a(!geMZ(4Orzklhmm~W=*`vt25o1X+aQi@hv;iFg-*@PRj2Od>IODA0lBK*lLF z-agDwx)t;*!9t}umdPO_Wc)dtegi3ZEr(gvD4#07E%FcTX`H_XXewdhjtOtzt+b7bOI1>Iib*~)Rc{>nb zfIwGYXA0C7l{mQucpEo9E6GwYIH zu@UwXMNn}-BO~0!_WY)B0vzuR1U3nGm2lR<>XmFhJ*FhE3Xf4sRs#Oh(;=fRTMCrn zjQqIQ4zA{7q}9AN>iDvb_2F2T5L?Il+|q;OC2~BTKwaZzFro-1@VvvZ7$e$+aY=Nl zSJXh>N%J}!+}vP&9$?lp=(aRuiMUV}1L^j$eSyJjqDA}cn$&X}+Rt2$C~j-8gcC{k z{BB%=q_%6|Z944m+>euOw-x?_x zhAb6J5Jk~7?$&d#-}thk;r4R>8#>TgnT80|j5c+Hx?P!Q;+vyZJfv-}m28bcg&29y z!pd?vjQQC`^4Mx=F(gyaoQ)6E)&tq3^N7UXyrD!Y>5$@HqwDvbs_d+RPmJWm2ScA# z?{YP0;9x8fyTT2vsFI8;=lWR+Ouic22P6A@VMx&4e0-H@G}wCZA>;U?sOR+O%ciol zyNWMO^CeD}e8%{Qxh9q>)P*=slf563sG&9@`r+uLdzUT}4w> ztg7lJ%r!N`S8Z#-d*M;jt6p(FEoXOD6q(ZMF_UDIv(6?~$x>YIZl5xIo~@qZTdJd8 z$T+vq6uR~Wq9Hopb;Q@#N|@hBmF!QF5mk!0c_ukEFcMPV#ygY=xY{Z1BP5Z#sQU%> zpc8Ymt178`ceF6shV-EH_u`{ZfVC%p`C&%5r%~!T>>_Wk2c4el3=6mgPT;&+2V?hj zxKQZ;8Ak7O2{ASszM_x5ycI7{%I2)4*K|Tma7{C?4(^Eq)$Y;&pPoIWwoEj71uyo^ z$_4R5ZKJM%De8<=+`;{$_iLFx^?%4jKmi^-84+ax8VOm^U#r_=e?|iW*=OkMI6!@+ z2=D}>JTxIAAS)p%qNGeKBl?G^!%Y2m&B*uD|^k8&H;n`48wV*zNeo+eL1L+3Kc_$}tr=BWhI7 zfZ@2$=IEu$=$rJ2TVQ+Y$6r)8Vt*7IBWEq$acVA%&!>GCkC>*2euEifxctcruKR1x z1=Z&84YnnMyn_wFQIEgk+X*zCv5ws@R@WueT~L4ici+qFR93D9ya2ok)1?L!JUxt$ zw1kj|tdhtdQaeA0&61{;!$LQz=UK_DeXf!Pcy21Q1@=~O*{6UaAq(L;YqFR?(o%vN z@X}sdf{>dNo`fdvY-5*8Rhe8CiM_dst*@yEDP1r}buA)v1F1*{PWhbgb-BNN;)`{f zcU;@O*cH5%an7oQI6B##`N&;o&IWsvtX6V*b)!Ab>R?n;g0OR| z-ukxvIny`tB&eDg8DdRRE{0YK2oo)GW3}LMN7NFesxm{z0tD5QX=S5eJ#<|ZUM&kY z^(M@tSA?l~wo)p=Ljr{=eS*y(7t>)2spqnZSiI)0!?8vm>Q~k2jO0+LWL}rLHDLSA zx%5NJL!s@g5`1Fc3!A#jy=hEwpbALEd16&xg@op;<<`4EsKo!mgt%DZI2g$&5Q1bs zE{aqFO4AE-MT4%p6;xNC2*(Bv9JkN{ny?5OA4{>mbz-H)NUg1Wyt3$XBgtL*?5;tN zy=zCEs~Pp@vB+vR1cttXvDG#)COQ*QF}pQEy|0QDYSlnz8iz>FGPrJC2*HUZp=5iu z8c}Y8sr5yuxaw54xff@`H-@QHgKlKUj@qs&OGxJGOir<~pt7~iNBtJpo$JLUs*2l^ zn#xH-n$J&94(KenKsOCKAOzM|z)LwVffkM9V2}hpi8|@~Df&{8k2CSa47_Gtm)368 zyiQ2f+0+fZ8zp;MkEIZg`qJGMcn7g^3LX`>NoyYmjJpey9Jp?SWOBMx3yKo*^Aw(# zrj1Hbq5sD5a~ZTdKds8#aa(2y4OE=MhH}h4lqkPUCSP;M4|XnxSxhITJ%{#{gzPyW zyMc?u0=lq#@_yGPC(f0}T(vn0nC^>Kw_q8z1@(LI*SNJtRup`e&o$4>7W?2U;h{hj z#yH#+1H3YAu#onsac%m&NF-?xwmv#n2kkZfBWttkmQQYE7y)wvO`8sky z#!x^@V|inZJN><^^}$`)+p)Ev@urPcIbhYbdIbfv=+*$&DtIOf9mAyv>A3AF=xZ^d z&1$9Jh)I-;P80f|)%tfJO4odm%n{hQB7hdx8{w`$H<45e$h#Bca+YK%7ur4z^9;i4 zezq90=wDa*@w0t`H!%BiFD#(97E``tYKADj7Vc?~U}&tY?jVTELoZoh8mMo?`ic|o zO3e;3$D))<+c-fqq+DL3M1smr#T?UhmR&TS&?hw=h&PsCsa7fWTq`$KqEMQnbBcZ_ zw1kg>SR!X!1kVIp&yFdohn1ZH60VLXMGYqU5IvShE}7vJn;$dtktp^G>-F?{4+nx0 z>jE9tD7kiCj>1zE)+4Ac+l{y4QO2{ZmC&BAB`=7iLVy)Mwoc1>5vOP~PQtw1!&-X!XrDyY`sB^ux8)cUUNx-EDWa57eFVLg;shqz z0Y_)!rxegsEhj>?hhWQ_-avq7aZy$mlq`t8g>vRo>e!d|}$B6M(Sitl}CARIN zy`%DmMNjh!9=Jjl24ZIKo7KA8K<5m>W|vPdJ}p3L||u1jeXN# zIsn#zFyOjK5843p&x>Sr3g>gf)5;6o(BEuR zy2Omjo>~|-yP-mbo!_>6ZRN~fN(9wcgl$*hyXPdUDKn>`#s6+}i~`qEFYO%t3(~y) z1f?D#@U2(0u&UfZ1iBxnUEuBqn!FJbn}U>&hLOdPRo1leESG$zg78t$Mx2}`qgtv0 z)}*)61Y6i=p~|I1HPzj93C8r?q86f`XlzD#yADC))@_^P+7|7VUy|#6;Kj&L|2FDb|n`eEL?or1$OI1kjz$Csj0HVF&zSG zRp^&W(a})QV}v=$eAtl$!`lw9`R56V*jZ*#0{tohkTlAe++;~HOvCW`wnq}OQYVw< zQ=~qs$1OJ3qf-@ALpNXdPhj61*b%%kn!n(QvNyZ>G#4k(`c|5wjQWBK8C6pbiZ>SZ zB$0A^Qlo;1LL%jxsUdeBBIBXJHe^@AEDN&3a=Y2SWE(gm7AbDE(nahtox~h;CnT!} z@?@+)kMXW?hJ5ifTx0B(w97gGh%jA)gMz3&P41)CDn+{*geDc=%%>Kz2kBFtD zS3Y;=e37V=Q`+=fnN@E=`)jQ|m#;yC*-xgL{F_OTp{tug#Xgy3dP4VLtK`&MT7kYu z6_y4<`*P>Q70D0|ixYsMA5hUJJn?M%Z6d_%5&P{dz1)&k3X8(dv!Lqn(ln^nu@x!@ zFzBPLUX||X6wzr;gE}(|cdY%?Iz~f=Ii2YI5p$*N%KUr{Ztk&=PZM>Ck|a-ShI3y^ zc*iUhNT5a(Vw?2|=P*Q&8NHp&$8iEd4s@3qifDma(Pb&)XG6#5%*%{O$;C6sQZ%}f zLaKmy!~T&Sr=r&?F)q9;4M&GGHy$%zM2a=;ViX}8;__s6fO^+Qh=iO!~n~f_6VDyVFj2{ouOJ-Yx^_B;Zt6r zJYuEDfov;#L$8BTb|%*R|<_2#ay>A?-h0##4G+gg+4vP?Z3uSUc7e(j?J&j6KKkPOOJ zI{DHlG)~{EQG7@$$p^L1fcH#C@dU)FE2GUWG>I>KeIVF@&aN0NichPs%bo|Bk9vB* ziT+E^#!Vy}{+F(vuB9bIr?f}Wqz74@5dIepkKX|oqbF~Xn*z8uP-PE8}eCu{IakA5lz^C)E=?1Y< z`?}PRM`$NZZjDhmT?D8KpD7yr2I8$=g?*j_X|gOc0@e3)+%Sez2R8S>OR<=+qAgc zvI~)23#Ngxvjh%zc(N{$gQ~^t0NRlR^_O6y@2m-U)WO}SMoW<${baeYtNfCwA=3%* zF&!-z%=Pth>Z#XWxG-RDboY&zt>5?rgA5;IHxUYo^}n#gLp-2qj|Xk`UfFiOcK4)r zlkJCqH}M(3lEJ!+xaAM- z*ZmfIJAB=8z-3dt%c!VZYkgfbp^!r~4D$ke>7|)zZIT=;ZCVrZQ_a;f><-`tmfZPW zvUj+iKABdRx@)lr9Bn}%Gm30k8I?Chb>2+hp1}12+4vpW`=XCv9xiE9OBO+oZadKXM zIJaBfDmakJzzvoovGu7u)wH;E!BWH(L|zQ5zgXElD6JJ2l3Q3b1j ziver+J3&O7m;R-`g9#;?E8K~6%`M}=0!F?^S#w6@okhmX7;{X>DZ^qmm5wsb*hm~T zEod2p#dlcV%xx5(rKHK1GD(ExVlt^=uca|wk?An;8nrt#$Gq4vPnMh-Ek1S+jlsiWI}2YL(iTngih2QZLai*a*hJiL|I|=UGs)^hp~$4 zuzQuLsi~`tG=Yn?&{^B8MsM(aOoy@11PXFHETQ{U@@3T~yPtQNwC&>!IxWTZbCd}S z!SpHc@@_$FUcY?NXwD1G54rXr&`(IXd~d;RC{cws_Y(BX5YNpSS3fKfK0(eXIJhjA zb)%&%mOcwSZ|0BB5;#Ag1#{Y+%;ii+M}Xo#%ED{r>g{)SW(%!bgZJ8sJR*usa#IDe z1p&M|q02kB;3J2lkR7pKGiUOCxp-u6&XRZ~6O*1G; zTz0=q(36*eYdX6)aO&D0Y0dG^tfiZ5fMh9_zrE)Fk=9?x~Q`j=T&MRI} zXqFTp7u|yJjP}s`e0XEyjDqNx81jjB^YuCRsO!=NYiv4$sB$(tk<*Q-&;%m~=A1u& zyb$EsNR#~~UhS2o#|@HlE~#Go=({z%+f*i$nD9y2kGAbnzSrLGG0yMATR@MSj z+Qzj?Apt{2FX@S@o3&=ksb?GESF?9b9>ZPLAn2M9@1qp|gkG#TG1PApmE)oatU&!W z^QdgHG4wXdZOb8o%D1WcIv_JIt>-2xGNBg_S`|0Nb+^zpZ0Dl?V?;ihPcm1b2?LQCwdpXMAf!xnz7oZd4 zKLWY$UJ7t@FHZTx5&!x;elJb=&=C=!kH$|p`hCE^IQ)TD@C zzYF@Gz#hwRJp@LM{BuG7BNUH;{Z7jHIT(*6njXS<10W0i%-sJ4jK30W`Z-vSwQe55 zS^<3cf5Q3;<(r>F_E;+9Au=wU|2ML~5Dxh{WRLX~9wJM}|9>O{vohy zqW?Fr`xnFCDXsh+t{0`h0k`Ok&yaj&w6;6mB|=ivTb+#a{ZcnGaQ z@V|xjMtY^)1y=drfc;Iy;9ovlk7?NtabaowZ*V;%Y5!cT9&@-JV)8cn-(mUz zqwDYB{kmm;&-3{m-u;$(Z36U=^#d?>&-(eB9qm5Pe$4s#)f|BS1K82{Odptg!~gwXqUpJ!f*)k6;>zO^U{0DdDe)xZn_xsDdhgQ*C9$5YTYVWU%f`J3Z2t)z+)&t}k KGk`w@0{TCAtviPR literal 0 HcmV?d00001 diff --git a/paimon-spark/paimon-spark-4.1/src/test/resources/hive-site.xml b/paimon-spark/paimon-spark-4.1/src/test/resources/hive-site.xml new file mode 100644 index 000000000000..bdf2bb090760 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/resources/hive-site.xml @@ -0,0 +1,56 @@ + + + + + hive.metastore.integral.jdo.pushdown + true + + + + hive.metastore.schema.verification + false + + + + hive.metastore.client.capability.check + false + + + + datanucleus.schema.autoCreateTables + true + + + + datanucleus.schema.autoCreateAll + true + + + + + datanucleus.connectionPoolingType + DBCP + + + + hive.metastore.uris + thrift://localhost:9090 + Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore. + + \ No newline at end of file diff --git a/paimon-spark/paimon-spark-4.1/src/test/resources/log4j2-test.properties b/paimon-spark/paimon-spark-4.1/src/test/resources/log4j2-test.properties new file mode 100644 index 000000000000..6f324f5863ac --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/resources/log4j2-test.properties @@ -0,0 +1,38 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# Set root logger level to OFF to not flood build logs +# set manually to INFO for debugging purposes +rootLogger.level = OFF +rootLogger.appenderRef.test.ref = TestLogger + +appender.testlogger.name = TestLogger +appender.testlogger.type = CONSOLE +appender.testlogger.target = SYSTEM_ERR +appender.testlogger.layout.type = PatternLayout +appender.testlogger.layout.pattern = %-4r [%tid %t] %-5p %c %x - %m%n + +logger.kafka.name = kafka +logger.kafka.level = OFF +logger.kafka2.name = state.change +logger.kafka2.level = OFF + +logger.zookeeper.name = org.apache.zookeeper +logger.zookeeper.level = OFF +logger.I0Itec.name = org.I0Itec +logger.I0Itec.level = OFF diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTest.scala new file mode 100644 index 000000000000..322d50a62127 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.procedure + +class CompactProcedureTest extends CompactProcedureTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ProcedureTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ProcedureTest.scala new file mode 100644 index 000000000000..d57846709877 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ProcedureTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.procedure + +class ProcedureTest extends ProcedureTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTest.scala new file mode 100644 index 000000000000..255906d04bf2 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class AnalyzeTableTest extends AnalyzeTableTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLTest.scala new file mode 100644 index 000000000000..b729f57b33e7 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class DDLTest extends DDLTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTest.scala new file mode 100644 index 000000000000..cb139d2a57be --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTest.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class DDLWithHiveCatalogTest extends DDLWithHiveCatalogTestBase {} + +class DefaultDatabaseTest extends DefaultDatabaseTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTest.scala new file mode 100644 index 000000000000..6170e2fd6c5c --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class DataFrameWriteTest extends DataFrameWriteTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DeleteFromTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DeleteFromTableTest.scala new file mode 100644 index 000000000000..ab33a40e5966 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DeleteFromTableTest.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +import org.apache.spark.SparkConf + +class DeleteFromTableTest extends DeleteFromTableTestBase {} + +class V2DeleteFromTableTest extends DeleteFromTableTestBase { + override protected def sparkConf: SparkConf = { + super.sparkConf.set("spark.paimon.write.use-v2-write", "true") + } +} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTest.scala new file mode 100644 index 000000000000..c6aa77419241 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class DescribeTableTest extends DescribeTableTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/FormatTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/FormatTableTest.scala new file mode 100644 index 000000000000..ba49976ab6c0 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/FormatTableTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class FormatTableTest extends FormatTableTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTest.scala new file mode 100644 index 000000000000..4f66584c303b --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class InsertOverwriteTableTest extends InsertOverwriteTableTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/MergeIntoTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/MergeIntoTableTest.scala new file mode 100644 index 000000000000..b9a85b147eea --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/MergeIntoTableTest.scala @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +import org.apache.paimon.spark.{PaimonAppendBucketedTableTest, PaimonAppendNonBucketTableTest, PaimonPrimaryKeyBucketedTableTest, PaimonPrimaryKeyNonBucketTableTest} + +class MergeIntoPrimaryKeyBucketedTableTest + extends MergeIntoTableTestBase + with MergeIntoPrimaryKeyTableTest + with MergeIntoNotMatchedBySourceTest + with PaimonPrimaryKeyBucketedTableTest {} + +class MergeIntoPrimaryKeyNonBucketTableTest + extends MergeIntoTableTestBase + with MergeIntoPrimaryKeyTableTest + with MergeIntoNotMatchedBySourceTest + with PaimonPrimaryKeyNonBucketTableTest {} + +class MergeIntoAppendBucketedTableTest + extends MergeIntoTableTestBase + with MergeIntoAppendTableTest + with MergeIntoNotMatchedBySourceTest + with PaimonAppendBucketedTableTest {} + +class MergeIntoAppendNonBucketedTableTest + extends MergeIntoTableTestBase + with MergeIntoAppendTableTest + with MergeIntoNotMatchedBySourceTest + with PaimonAppendNonBucketTableTest {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonCompositePartitionKeyTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonCompositePartitionKeyTest.scala new file mode 100644 index 000000000000..635185a9ed0e --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonCompositePartitionKeyTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class PaimonCompositePartitionKeyTest extends PaimonCompositePartitionKeyTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTest.scala new file mode 100644 index 000000000000..ec140a89bbd3 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTest.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.expressions.{Attribute, GetStructField, NamedExpression, ScalarSubquery} +import org.apache.spark.sql.paimon.shims.SparkShimLoader + +class PaimonOptimizationTest extends PaimonOptimizationTestBase { + + override def extractorExpression( + cteIndex: Int, + output: Seq[Attribute], + fieldIndex: Int): NamedExpression = { + GetStructField( + ScalarSubquery( + SparkShimLoader.shim + .createCTERelationRef(cteIndex, resolved = true, output.toSeq, isStreaming = false)), + fieldIndex, + None) + .as("scalarsubquery()") + } +} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonPushDownTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonPushDownTest.scala new file mode 100644 index 000000000000..26677d85c71a --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonPushDownTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class PaimonPushDownTest extends PaimonPushDownTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonV1FunctionTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonV1FunctionTest.scala new file mode 100644 index 000000000000..f37fbad27033 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonV1FunctionTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class PaimonV1FunctionTest extends PaimonV1FunctionTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonViewTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonViewTest.scala new file mode 100644 index 000000000000..6ab8a2671b51 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonViewTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class PaimonViewTest extends PaimonViewTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RewriteUpsertTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RewriteUpsertTableTest.scala new file mode 100644 index 000000000000..412aa3b30351 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RewriteUpsertTableTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class RewriteUpsertTableTest extends RewriteUpsertTableTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RowTrackingTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RowTrackingTest.scala new file mode 100644 index 000000000000..9f96840a7788 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RowTrackingTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class RowTrackingTest extends RowTrackingTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/ShowColumnsTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/ShowColumnsTest.scala new file mode 100644 index 000000000000..6601dc2fca37 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/ShowColumnsTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class ShowColumnsTest extends PaimonShowColumnsTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/SparkV2FilterConverterTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/SparkV2FilterConverterTest.scala new file mode 100644 index 000000000000..21c4c8a495ed --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/SparkV2FilterConverterTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class SparkV2FilterConverterTest extends SparkV2FilterConverterTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/TagDdlTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/TagDdlTest.scala new file mode 100644 index 000000000000..92309d54167b --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/TagDdlTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class TagDdlTest extends PaimonTagDdlTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/UpdateTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/UpdateTableTest.scala new file mode 100644 index 000000000000..194aab278c0e --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/UpdateTableTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class UpdateTableTest extends UpdateTableTestBase {} diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/VariantTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/VariantTest.scala new file mode 100644 index 000000000000..aafd1dc4b967 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/VariantTest.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.sql + +class VariantTest extends VariantTestBase {} diff --git a/pom.xml b/pom.xml index 600a65c3c08a..449bb0d881e6 100644 --- a/pom.xml +++ b/pom.xml @@ -425,6 +425,7 @@ under the License. paimon-spark/paimon-spark4-common paimon-spark/paimon-spark-4.0 + paimon-spark/paimon-spark-4.1 17 From a4115a61a3a6d9c39901791301084702c5bc4f06 Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 24 Dec 2025 12:17:17 +0800 Subject: [PATCH 02/35] support spark 4.1.0 --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 449bb0d881e6..2840e12f243d 100644 --- a/pom.xml +++ b/pom.xml @@ -432,11 +432,11 @@ under the License. 4.13.1 2.13 ${scala213.version} - 4.0.1 + 4.1.0 paimon-spark4-common_2.13 18.1.0 - 4.0 - 4.0.1 + 4.1 + 4.1.0 From 3f432edc5d2e5d52b0d3f46fd31d058fe9dbc185 Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 24 Dec 2025 18:07:46 +0800 Subject: [PATCH 03/35] createFileIndex --- .../sql/paimon/shims/MinorVersionShim.scala | 102 ++++++++++++++++ .../sql/execution/SparkFormatTable.scala | 69 +---------- .../spark/sql/paimon/shims/SparkShim.scala | 9 ++ .../sql/paimon/shims/MinorVersionShim.scala | 76 ++++++++++++ .../spark/sql/paimon/shims/Spark3Shim.scala | 20 ++++ .../sql/paimon/shims/MinorVersionShim.scala | 109 ++++++++++++++++++ .../spark/sql/paimon/shims/Spark4Shim.scala | 68 +++++++---- 7 files changed, 367 insertions(+), 86 deletions(-) create mode 100644 paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala create mode 100644 paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala new file mode 100644 index 000000000000..eaeca72532ee --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.paimon.shims + +import org.apache.hadoop.fs.Path +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex +import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +import scala.collection.JavaConverters._ + +object MinorVersionShim { + + + def createFileIndex( + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex = { + + + class PartitionedMetadataLogFileIndex( + sparkSession: SparkSession, + path: Path, + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + override val partitionSchema: StructType) + extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema) + + class PartitionedInMemoryFileIndex( + sparkSession: SparkSession, + rootPathsSpecified: Seq[Path], + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + fileStatusCache: FileStatusCache = NoopCache, + userSpecifiedPartitionSpec: Option[PartitionSpec] = None, + metadataOpsTimeNs: Option[Long] = None, + override val partitionSchema: StructType) + extends InMemoryFileIndex( + sparkSession, + rootPathsSpecified, + parameters, + userSpecifiedSchema, + fileStatusCache, + userSpecifiedPartitionSpec, + metadataOpsTimeNs) + + + def globPaths: Boolean = { + val entry = options.get(DataSource.GLOB_PATHS_KEY) + Option(entry).forall(_ == "true") + } + + val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap + val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap) + if (FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)) { + new PartitionedMetadataLogFileIndex( + sparkSession, + new Path(paths.head), + options.asScala.toMap, + userSpecifiedSchema, + partitionSchema = partitionSchema) + } else { + val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary( + paths, + hadoopConf, + checkEmptyGlobPath = true, + checkFilesExist = true, + enableGlobbing = globPaths) + val fileStatusCache = FileStatusCache.getOrCreate(sparkSession) + + new PartitionedInMemoryFileIndex( + sparkSession, + rootPathsSpecified, + caseSensitiveMap, + userSpecifiedSchema, + fileStatusCache, + partitionSchema = partitionSchema) + } + } + +} diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/execution/SparkFormatTable.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/execution/SparkFormatTable.scala index 2cb0101653af..776b17401d14 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/execution/SparkFormatTable.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/execution/SparkFormatTable.scala @@ -33,6 +33,7 @@ import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetTable import org.apache.spark.sql.execution.datasources.v2.text.{TextScanBuilder, TextTable} import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex} +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -50,71 +51,13 @@ object SparkFormatTable { paths: Seq[String], userSpecifiedSchema: Option[StructType], partitionSchema: StructType): PartitioningAwareFileIndex = { - - def globPaths: Boolean = { - val entry = options.get(DataSource.GLOB_PATHS_KEY) - Option(entry).forall(_ == "true") - } - - val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap - // Hadoop Configurations are case-sensitive. - val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap) - if (FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)) { - // We are reading from the results of a streaming query. We will load files from - // the metadata log instead of listing them using HDFS APIs. - new PartitionedMetadataLogFileIndex( - sparkSession, - new Path(paths.head), - options.asScala.toMap, - userSpecifiedSchema, - partitionSchema = partitionSchema) - } else { - // This is a non-streaming file based datasource. - val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary( - paths, - hadoopConf, - checkEmptyGlobPath = true, - checkFilesExist = true, - enableGlobbing = globPaths) - val fileStatusCache = FileStatusCache.getOrCreate(sparkSession) - - new PartitionedInMemoryFileIndex( - sparkSession, - rootPathsSpecified, - caseSensitiveMap, - userSpecifiedSchema, - fileStatusCache, - partitionSchema = partitionSchema) - } - } - - // Extend from MetadataLogFileIndex to override partitionSchema - private class PartitionedMetadataLogFileIndex( - sparkSession: SparkSession, - path: Path, - parameters: Map[String, String], - userSpecifiedSchema: Option[StructType], - override val partitionSchema: StructType) - extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema) - - // Extend from InMemoryFileIndex to override partitionSchema - private class PartitionedInMemoryFileIndex( - sparkSession: SparkSession, - rootPathsSpecified: Seq[Path], - parameters: Map[String, String], - userSpecifiedSchema: Option[StructType], - fileStatusCache: FileStatusCache = NoopCache, - userSpecifiedPartitionSpec: Option[PartitionSpec] = None, - metadataOpsTimeNs: Option[Long] = None, - override val partitionSchema: StructType) - extends InMemoryFileIndex( + SparkShimLoader.shim.createFileIndex( + options, sparkSession, - rootPathsSpecified, - parameters, + paths, userSpecifiedSchema, - fileStatusCache, - userSpecifiedPartitionSpec, - metadataOpsTimeNs) + partitionSchema) + } } trait PartitionedFormatTable extends SupportsPartitionManagement { diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala index 98296a400672..7e0b55297ab9 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala @@ -32,7 +32,9 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap import java.util.{Map => JMap} @@ -98,4 +100,11 @@ trait SparkShim { def isSparkVariantType(dataType: org.apache.spark.sql.types.DataType): Boolean def SparkVariantType(): org.apache.spark.sql.types.DataType + + def createFileIndex( + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex } diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index c069c02dc5b7..bcad29cc27d7 100644 --- a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -18,8 +18,17 @@ package org.apache.spark.sql.paimon.shims +import org.apache.hadoop.fs.Path +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.plans.logical.{CTERelationRef, LogicalPlan, MergeAction, MergeIntoTable} +import org.apache.spark.sql.execution.datasources.{DataSource, FileStatusCache, InMemoryFileIndex, NoopCache, PartitioningAwareFileIndex, PartitionSpec} +import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex +import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +import scala.collection.JavaConverters._ object MinorVersionShim { @@ -44,4 +53,71 @@ object MinorVersionShim { notMatchedActions, notMatchedBySourceActions) } + + def createFileIndex( + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex = { + + class PartitionedMetadataLogFileIndex( + sparkSession: SparkSession, + path: Path, + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + override val partitionSchema: StructType) + extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema) + + class PartitionedInMemoryFileIndex( + sparkSession: SparkSession, + rootPathsSpecified: Seq[Path], + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + fileStatusCache: FileStatusCache = NoopCache, + userSpecifiedPartitionSpec: Option[PartitionSpec] = None, + metadataOpsTimeNs: Option[Long] = None, + override val partitionSchema: StructType) + extends InMemoryFileIndex( + sparkSession, + rootPathsSpecified, + parameters, + userSpecifiedSchema, + fileStatusCache, + userSpecifiedPartitionSpec, + metadataOpsTimeNs) + + def globPaths: Boolean = { + val entry = options.get(DataSource.GLOB_PATHS_KEY) + Option(entry).forall(_ == "true") + } + + val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap + val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap) + if (FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)) { + new PartitionedMetadataLogFileIndex( + sparkSession, + new Path(paths.head), + options.asScala.toMap, + userSpecifiedSchema, + partitionSchema = partitionSchema) + } else { + val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary( + paths, + hadoopConf, + checkEmptyGlobPath = true, + checkFilesExist = true, + enableGlobbing = globPaths) + val fileStatusCache = FileStatusCache.getOrCreate(sparkSession) + + new PartitionedInMemoryFileIndex( + sparkSession, + rootPathsSpecified, + caseSensitiveMap, + userSpecifiedSchema, + fileStatusCache, + partitionSchema = partitionSchema) + } + } + } diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala index 70011e14c3c2..272456a19aff 100644 --- a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala @@ -24,6 +24,7 @@ import org.apache.paimon.spark.catalyst.parser.extensions.PaimonSpark3SqlExtensi import org.apache.paimon.spark.data.{Spark3ArrayData, Spark3InternalRow, Spark3InternalRowWithBlob, SparkArrayData, SparkInternalRow} import org.apache.paimon.types.{DataType, RowType} +import org.apache.hadoop.fs.Path import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} @@ -34,10 +35,15 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex} import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap import java.util.{Map => JMap} +import scala.collection.JavaConverters._ + class Spark3Shim extends SparkShim { override def classicApi: ClassicApi = new Classic3Api @@ -120,4 +126,18 @@ class Spark3Shim extends SparkShim { override def toPaimonVariant(array: ArrayData, pos: Int): Variant = throw new UnsupportedOperationException() + + override def createFileIndex( + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex = { + MinorVersionShim.createFileIndex( + options, + sparkSession, + paths, + userSpecifiedSchema, + partitionSchema) + } } diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala new file mode 100644 index 000000000000..d9b3411dfefc --- /dev/null +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.paimon.shims + +import org.apache.hadoop.fs.Path +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} +import org.apache.spark.sql.catalyst.plans.logical.{CTERelationRef, LogicalPlan, MergeAction, MergeIntoTable} +import org.apache.spark.sql.execution.datasources.{DataSource, FileStatusCache, InMemoryFileIndex, NoopCache, PartitionSpec, PartitioningAwareFileIndex} +import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex +import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + + +import org.apache.hadoop.fs.Path + +import java.util.{Map => JMap} + +import scala.collection.JavaConverters._ + +object MinorVersionShim { + + + def createFileIndex( + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex ={ + + + class PartitionedMetadataLogFileIndex( + sparkSession: SparkSession, + path: Path, + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + override val partitionSchema: StructType) + extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema) + + class PartitionedInMemoryFileIndex( + sparkSession: SparkSession, + rootPathsSpecified: Seq[Path], + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + fileStatusCache: FileStatusCache = NoopCache, + userSpecifiedPartitionSpec: Option[PartitionSpec] = None, + metadataOpsTimeNs: Option[Long] = None, + override val partitionSchema: StructType) + extends InMemoryFileIndex( + sparkSession, + rootPathsSpecified, + parameters, + userSpecifiedSchema, + fileStatusCache, + userSpecifiedPartitionSpec, + metadataOpsTimeNs) + + + def globPaths: Boolean = { + val entry = options.get(DataSource.GLOB_PATHS_KEY) + Option(entry).forall(_ == "true") + } + + val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap + val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap) + if (FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)) { + new PartitionedMetadataLogFileIndex( + sparkSession, + new Path(paths.head), + options.asScala.toMap, + userSpecifiedSchema, + partitionSchema = partitionSchema) + } else { + val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary( + paths, + hadoopConf, + checkEmptyGlobPath = true, + checkFilesExist = true, + enableGlobbing = globPaths) + val fileStatusCache = FileStatusCache.getOrCreate(sparkSession) + + new PartitionedInMemoryFileIndex( + sparkSession, + rootPathsSpecified, + caseSensitiveMap, + userSpecifiedSchema, + fileStatusCache, + partitionSchema = partitionSchema) + } + } + +} diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala index ad36acfb26a9..5cebece94091 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala @@ -34,11 +34,18 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex} import org.apache.spark.sql.types.{DataTypes, StructType, VariantType} +import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.unsafe.types.VariantVal +import org.apache.hadoop.fs.Path + import java.util.{Map => JMap} +import scala.jdk.CollectionConverters._ + class Spark4Shim extends SparkShim { override def classicApi: ClassicApi = new Classic4Api @@ -56,9 +63,9 @@ class Spark4Shim extends SparkShim { } override def createSparkInternalRowWithBlob( - rowType: RowType, - blobFieldIndex: Int, - blobAsDescriptor: Boolean): SparkInternalRow = { + rowType: RowType, + blobFieldIndex: Int, + blobAsDescriptor: Boolean): SparkInternalRow = { new Spark4InternalRowWithBlob(rowType, blobFieldIndex, blobAsDescriptor) } @@ -67,42 +74,42 @@ class Spark4Shim extends SparkShim { } override def createTable( - tableCatalog: TableCatalog, - ident: Identifier, - schema: StructType, - partitions: Array[Transform], - properties: JMap[String, String]): Table = { + tableCatalog: TableCatalog, + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: JMap[String, String]): Table = { val columns = CatalogV2Util.structTypeToV2Columns(schema) tableCatalog.createTable(ident, columns, partitions, properties) } override def createCTERelationRef( - cteId: Long, - resolved: Boolean, - output: Seq[Attribute], - isStreaming: Boolean): CTERelationRef = { + cteId: Long, + resolved: Boolean, + output: Seq[Attribute], + isStreaming: Boolean): CTERelationRef = { CTERelationRef(cteId, resolved, output.toSeq, isStreaming) } override def supportsHashAggregate( - aggregateBufferAttributes: Seq[Attribute], - groupingExpression: Seq[Expression]): Boolean = { + aggregateBufferAttributes: Seq[Attribute], + groupingExpression: Seq[Expression]): Boolean = { Aggregate.supportsHashAggregate(aggregateBufferAttributes.toSeq, groupingExpression.toSeq) } override def supportsObjectHashAggregate( - aggregateExpressions: Seq[AggregateExpression], - groupByExpressions: Seq[Expression]): Boolean = + aggregateExpressions: Seq[AggregateExpression], + groupByExpressions: Seq[Expression]): Boolean = Aggregate.supportsObjectHashAggregate(aggregateExpressions.toSeq, groupByExpressions.toSeq) override def createMergeIntoTable( - targetTable: LogicalPlan, - sourceTable: LogicalPlan, - mergeCondition: Expression, - matchedActions: Seq[MergeAction], - notMatchedActions: Seq[MergeAction], - notMatchedBySourceActions: Seq[MergeAction], - withSchemaEvolution: Boolean): MergeIntoTable = { + targetTable: LogicalPlan, + sourceTable: LogicalPlan, + mergeCondition: Expression, + matchedActions: Seq[MergeAction], + notMatchedActions: Seq[MergeAction], + notMatchedBySourceActions: Seq[MergeAction], + withSchemaEvolution: Boolean): MergeIntoTable = { MergeIntoTable( targetTable, sourceTable, @@ -132,4 +139,19 @@ class Spark4Shim extends SparkShim { dataType.isInstanceOf[VariantType] override def SparkVariantType(): org.apache.spark.sql.types.DataType = DataTypes.VariantType + + override def createFileIndex( + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex = { + MinorVersionShim.createFileIndex( + options, + sparkSession, + paths, + userSpecifiedSchema, + partitionSchema) + } } + From 5bd65db65d3a72c150a87490d61ec8c4d07af2c7 Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 24 Dec 2025 18:45:49 +0800 Subject: [PATCH 04/35] match --- .../spark/catalyst/analysis/PaimonMergeIntoBase.scala | 4 ++-- .../catalyst/analysis/PaimonMergeIntoResolver.scala | 4 +++- .../analysis/PaimonMergeIntoResolverBase.scala | 4 +++- .../spark/catalyst/analysis/PaimonRelation.scala | 10 ++++++---- .../paimon/spark/commands/MergeIntoPaimonTable.scala | 3 ++- .../org/apache/spark/sql/paimon/shims/Spark3Shim.scala | 4 ---- 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoBase.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoBase.scala index 8a52273eeab2..246f6936537b 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoBase.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoBase.scala @@ -106,8 +106,8 @@ trait PaimonMergeIntoBase dataEvolutionEnabled: Boolean): MergeAction = { action match { case d @ DeleteAction(_) => d - case u @ UpdateAction(_, assignments) => - u.copy(assignments = alignAssignments(targetOutput, assignments)) + case u: UpdateAction => + u.copy(assignments = alignAssignments(targetOutput, u.assignments)) case i @ InsertAction(_, assignments) => i.copy(assignments = alignAssignments(targetOutput, assignments)) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoResolver.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoResolver.scala index 78ee8ec2171c..04c996136cf1 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoResolver.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoResolver.scala @@ -33,7 +33,9 @@ object PaimonMergeIntoResolver extends PaimonMergeIntoResolverBase { // The condition must be from the target table val resolvedCond = condition.map(resolveCondition(resolve, _, merge, TARGET_ONLY)) DeleteAction(resolvedCond) - case UpdateAction(condition, assignments) => + case u: UpdateAction => + val condition = u.condition + val assignments = u.assignments // The condition and value must be from the target table val resolvedCond = condition.map(resolveCondition(resolve, _, merge, TARGET_ONLY)) val resolvedAssignments = resolveAssignments(resolve, assignments, merge, TARGET_ONLY) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoResolverBase.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoResolverBase.scala index 218fc9c0f3ef..aff4ba191f60 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoResolverBase.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoResolverBase.scala @@ -58,7 +58,9 @@ trait PaimonMergeIntoResolverBase extends ExpressionHelper { // The condition can be from both target and source tables val resolvedCond = condition.map(resolveCondition(resolve, _, merge, ALL)) DeleteAction(resolvedCond) - case UpdateAction(condition, assignments) => + case u: UpdateAction => + val condition = u.condition + val assignments = u.assignments // The condition and value can be from both target and source tables val resolvedCond = condition.map(resolveCondition(resolve, _, merge, ALL)) val resolvedAssignments = resolveAssignments(resolve, assignments, merge, ALL) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonRelation.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonRelation.scala index c362ca67c792..0ba17e2006cb 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonRelation.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonRelation.scala @@ -32,8 +32,10 @@ object PaimonRelation extends Logging { def unapply(plan: LogicalPlan): Option[SparkTable] = EliminateSubqueryAliases(plan) match { - case Project(_, DataSourceV2Relation(table: SparkTable, _, _, _, _)) => Some(table) - case DataSourceV2Relation(table: SparkTable, _, _, _, _) => Some(table) + case Project(_, d: DataSourceV2Relation) if d.table.isInstanceOf[SparkTable] => + Some(d.table.asInstanceOf[SparkTable]) + case d: DataSourceV2Relation if d.table.isInstanceOf[SparkTable] => + Some(d.table.asInstanceOf[SparkTable]) case ResolvedTable(_, _, table: SparkTable, _) => Some(table) case _ => None } @@ -50,8 +52,8 @@ object PaimonRelation extends Logging { def getPaimonRelation(plan: LogicalPlan): DataSourceV2Relation = { EliminateSubqueryAliases(plan) match { - case Project(_, d @ DataSourceV2Relation(_: SparkTable, _, _, _, _)) => d - case d @ DataSourceV2Relation(_: SparkTable, _, _, _, _) => d + case Project(_, d: DataSourceV2Relation) if d.table.isInstanceOf[SparkTable] => d + case d: DataSourceV2Relation if d.table.isInstanceOf[SparkTable] => d case _ => throw new RuntimeException(s"It's not a paimon table, $plan") } } diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonTable.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonTable.scala index d956a9472f11..f555c464e322 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonTable.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonTable.scala @@ -253,7 +253,8 @@ case class MergeIntoPaimonTable( def processMergeActions(actions: Seq[MergeAction]): Seq[Seq[Expression]] = { val columnExprs = actions.map { - case UpdateAction(_, assignments) => + case u: UpdateAction => + val assignments = u.assignments var exprs = assignments.map(_.value) if (writeRowTracking) { exprs ++= Seq(rowIdAttr, Literal(null)) diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala index 272456a19aff..f7ccb08a76aa 100644 --- a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala @@ -24,7 +24,6 @@ import org.apache.paimon.spark.catalyst.parser.extensions.PaimonSpark3SqlExtensi import org.apache.paimon.spark.data.{Spark3ArrayData, Spark3InternalRow, Spark3InternalRowWithBlob, SparkArrayData, SparkInternalRow} import org.apache.paimon.types.{DataType, RowType} -import org.apache.hadoop.fs.Path import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} @@ -36,14 +35,11 @@ import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap import java.util.{Map => JMap} -import scala.collection.JavaConverters._ - class Spark3Shim extends SparkShim { override def classicApi: ClassicApi = new Classic3Api From a2033f0a80a5e8059d24320e6a1b8d7375221222 Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 24 Dec 2025 18:51:01 +0800 Subject: [PATCH 05/35] match --- .../parser/extensions/RewritePaimonFunctionCommands.scala | 2 +- .../scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/RewritePaimonFunctionCommands.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/RewritePaimonFunctionCommands.scala index ddbd9df5ac1b..84e7dfc01c0c 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/RewritePaimonFunctionCommands.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/RewritePaimonFunctionCommands.scala @@ -103,7 +103,7 @@ case class RewritePaimonFunctionCommands(spark: SparkSession) plan.resolveOperatorsUp { case u: UnresolvedWith => u.copy(cteRelations = u.cteRelations.map( - t => (t._1, transformPaimonV1Function(t._2).asInstanceOf[SubqueryAlias]))) + t => t.copy(_1 = t._1, _2 = transformPaimonV1Function(t._2).asInstanceOf[SubqueryAlias]))) case l: LogicalPlan => l.transformExpressionsWithPruning(_.containsAnyPattern(UNRESOLVED_FUNCTION)) { case u: UnresolvedFunction => diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala index 5cebece94091..d7e54d42dfc5 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala @@ -35,17 +35,12 @@ import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex} import org.apache.spark.sql.types.{DataTypes, StructType, VariantType} import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.unsafe.types.VariantVal -import org.apache.hadoop.fs.Path - import java.util.{Map => JMap} -import scala.jdk.CollectionConverters._ - class Spark4Shim extends SparkShim { override def classicApi: ClassicApi = new Classic4Api From 38e0986bbf5d9f39a5c03e2eda35642c728ff199 Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 24 Dec 2025 20:12:08 +0800 Subject: [PATCH 06/35] match --- .../sql/paimon/shims/MinorVersionShim.scala | 105 ++++++++++++++++++ .../MergePaimonScalarSubqueries.scala | 1 - .../sql/paimon/shims/MinorVersionShim.scala | 52 +++++---- .../MergeIntoPaimonDataEvolutionTable.scala | 26 +++-- .../spark/sql/paimon/shims/SparkShim.scala | 3 + .../sql/paimon/shims/MinorVersionShim.scala | 75 ------------- .../spark/sql/paimon/shims/Spark3Shim.scala | 90 ++++++++++++--- .../paimon/spark/data/Spark4ArrayData.scala | 10 +- .../paimon/spark/data/Spark4InternalRow.scala | 10 +- .../sql/paimon/shims/MinorVersionShim.scala | 90 ++------------- .../spark/sql/paimon/shims/Spark4Shim.scala | 65 ++++++----- 11 files changed, 301 insertions(+), 226 deletions(-) create mode 100644 paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala diff --git a/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala new file mode 100644 index 000000000000..f90297f6235d --- /dev/null +++ b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.paimon.shims + +import org.apache.hadoop.fs.Path +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.plans.logical.MergeRows +import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction +import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex} +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +import scala.collection.JavaConverters._ + +object MinorVersionShim { + + def createKeep(context: String, condition: Expression, output: Seq[Expression]): Instruction = { + MergeRows.Keep(condition, output) + } + + def createFileIndex( + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex = { + + class PartitionedMetadataLogFileIndex( + sparkSession: SparkSession, + path: Path, + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + override val partitionSchema: StructType) + extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema) + + class PartitionedInMemoryFileIndex( + sparkSession: SparkSession, + rootPathsSpecified: Seq[Path], + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + fileStatusCache: FileStatusCache = NoopCache, + userSpecifiedPartitionSpec: Option[PartitionSpec] = None, + metadataOpsTimeNs: Option[Long] = None, + override val partitionSchema: StructType) + extends InMemoryFileIndex( + sparkSession, + rootPathsSpecified, + parameters, + userSpecifiedSchema, + fileStatusCache, + userSpecifiedPartitionSpec, + metadataOpsTimeNs) + + def globPaths: Boolean = { + val entry = options.get(DataSource.GLOB_PATHS_KEY) + Option(entry).forall(_ == "true") + } + + val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap + val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap) + if (FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)) { + new PartitionedMetadataLogFileIndex( + sparkSession, + new Path(paths.head), + options.asScala.toMap, + userSpecifiedSchema, + partitionSchema = partitionSchema) + } else { + val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary( + paths, + hadoopConf, + checkEmptyGlobPath = true, + checkFilesExist = true, + enableGlobbing = globPaths) + val fileStatusCache = FileStatusCache.getOrCreate(sparkSession) + + new PartitionedInMemoryFileIndex( + sparkSession, + rootPathsSpecified, + caseSensitiveMap, + userSpecifiedSchema, + fileStatusCache, + partitionSchema = partitionSchema) + } + } + +} diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueries.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueries.scala index 88386e2bfebe..e86195f1af0b 100644 --- a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueries.scala +++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueries.scala @@ -90,4 +90,3 @@ object MergePaimonScalarSubqueries extends MergePaimonScalarSubqueriesBase { ScalarSubquery(plan, exprId = exprId) } } - diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index eaeca72532ee..381e9dfd656e 100644 --- a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -20,6 +20,9 @@ package org.apache.spark.sql.paimon.shims import org.apache.hadoop.fs.Path import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.plans.logical.MergeRows +import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink @@ -30,32 +33,42 @@ import scala.collection.JavaConverters._ object MinorVersionShim { + def createKeep(context: String, condition: Expression, output: Seq[Expression]): Instruction = { + val ctx = context match { + case "COPY" => MergeRows.Copy + case "DELETE" => MergeRows.Delete + case "INSERT" => MergeRows.Insert + case "UPDATE" => MergeRows.Update + case _ => MergeRows.Copy + } - def createFileIndex( - options: CaseInsensitiveStringMap, - sparkSession: SparkSession, - paths: Seq[String], - userSpecifiedSchema: Option[StructType], - partitionSchema: StructType): PartitioningAwareFileIndex = { + MergeRows.Keep(ctx, condition, output) + } + def createFileIndex( + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex = { class PartitionedMetadataLogFileIndex( - sparkSession: SparkSession, - path: Path, - parameters: Map[String, String], - userSpecifiedSchema: Option[StructType], - override val partitionSchema: StructType) + sparkSession: SparkSession, + path: Path, + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + override val partitionSchema: StructType) extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema) class PartitionedInMemoryFileIndex( - sparkSession: SparkSession, - rootPathsSpecified: Seq[Path], - parameters: Map[String, String], - userSpecifiedSchema: Option[StructType], - fileStatusCache: FileStatusCache = NoopCache, - userSpecifiedPartitionSpec: Option[PartitionSpec] = None, - metadataOpsTimeNs: Option[Long] = None, - override val partitionSchema: StructType) + sparkSession: SparkSession, + rootPathsSpecified: Seq[Path], + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + fileStatusCache: FileStatusCache = NoopCache, + userSpecifiedPartitionSpec: Option[PartitionSpec] = None, + metadataOpsTimeNs: Option[Long] = None, + override val partitionSchema: StructType) extends InMemoryFileIndex( sparkSession, rootPathsSpecified, @@ -65,7 +78,6 @@ object MinorVersionShim { userSpecifiedPartitionSpec, metadataOpsTimeNs) - def globPaths: Boolean = { val entry = options.get(DataSource.GLOB_PATHS_KEY) Option(entry).forall(_ == "true") diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonDataEvolutionTable.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonDataEvolutionTable.scala index e2eaed8fe54f..6f6d7c0cee16 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonDataEvolutionTable.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonDataEvolutionTable.scala @@ -36,9 +36,9 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Equ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftOuter} import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Keep import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.functions.{col, udf} +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.types.StructType import scala.collection.JavaConverters._ @@ -316,16 +316,20 @@ case class MergeIntoPaimonDataEvolutionTable( ua.copy(condition = newCond, assignments = newAssignments) } + val shim = SparkShimLoader.shim val mergeRows = MergeRows( isSourceRowPresent = TrueLiteral, isTargetRowPresent = TrueLiteral, matchedInstructions = rewrittenUpdateActions .map( action => { - Keep(action.condition.getOrElse(TrueLiteral), action.assignments.map(a => a.value)) - }) ++ Seq(Keep(TrueLiteral, output)), + shim.createKeep( + "COPY", + action.condition.getOrElse(TrueLiteral), + action.assignments.map(a => a.value)) + }) ++ Seq(shim.createKeep("COPY", TrueLiteral, output)), notMatchedInstructions = Nil, - notMatchedBySourceInstructions = Seq(Keep(TrueLiteral, output)), + notMatchedBySourceInstructions = Seq(shim.createKeep("COPY", TrueLiteral, output)), checkCardinality = false, output = output, child = readPlan @@ -355,16 +359,20 @@ case class MergeIntoPaimonDataEvolutionTable( Join(targetTableProj, sourceTableProj, LeftOuter, Some(matchedCondition), JoinHint.NONE) val rowFromSourceAttr = attribute(ROW_FROM_SOURCE, joinPlan) val rowFromTargetAttr = attribute(ROW_FROM_TARGET, joinPlan) + val shim = SparkShimLoader.shim val mergeRows = MergeRows( isSourceRowPresent = rowFromSourceAttr, isTargetRowPresent = rowFromTargetAttr, matchedInstructions = realUpdateActions .map( action => { - Keep(action.condition.getOrElse(TrueLiteral), action.assignments.map(a => a.value)) - }) ++ Seq(Keep(TrueLiteral, output)), + shim.createKeep( + "COPY", + action.condition.getOrElse(TrueLiteral), + action.assignments.map(a => a.value)) + }) ++ Seq(shim.createKeep("COPY", TrueLiteral, output)), notMatchedInstructions = Nil, - notMatchedBySourceInstructions = Seq(Keep(TrueLiteral, output)).toSeq, + notMatchedBySourceInstructions = Seq(shim.createKeep("COPY", TrueLiteral, output)).toSeq, checkCardinality = false, output = output, child = joinPlan @@ -393,13 +401,15 @@ case class MergeIntoPaimonDataEvolutionTable( Join(sourceRelation, targetReadPlan, LeftAnti, Some(matchedCondition), JoinHint.NONE) // merge rows as there are multiple not matched actions + val shim = SparkShimLoader.shim val mergeRows = MergeRows( isSourceRowPresent = TrueLiteral, isTargetRowPresent = FalseLiteral, matchedInstructions = Nil, notMatchedInstructions = notMatchedActions.map { case insertAction: InsertAction => - Keep( + shim.createKeep( + "COPY", insertAction.condition.getOrElse(TrueLiteral), insertAction.assignments.map( a => diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala index 7e0b55297ab9..0dd32a615a52 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala @@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.parser.ParserInterface import org.apache.spark.sql.catalyst.plans.logical.{CTERelationRef, LogicalPlan, MergeAction, MergeIntoTable} +import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} @@ -90,6 +91,8 @@ trait SparkShim { notMatchedBySourceActions: Seq[MergeAction], withSchemaEvolution: Boolean): MergeIntoTable + def createKeep(context: String, condition: Expression, output: Seq[Expression]): Instruction + // for variant def toPaimonVariant(o: Object): Variant diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index bcad29cc27d7..06de30e4af06 100644 --- a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -18,17 +18,8 @@ package org.apache.spark.sql.paimon.shims -import org.apache.hadoop.fs.Path -import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.plans.logical.{CTERelationRef, LogicalPlan, MergeAction, MergeIntoTable} -import org.apache.spark.sql.execution.datasources.{DataSource, FileStatusCache, InMemoryFileIndex, NoopCache, PartitioningAwareFileIndex, PartitionSpec} -import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex -import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink -import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.util.CaseInsensitiveStringMap - -import scala.collection.JavaConverters._ object MinorVersionShim { @@ -54,70 +45,4 @@ object MinorVersionShim { notMatchedBySourceActions) } - def createFileIndex( - options: CaseInsensitiveStringMap, - sparkSession: SparkSession, - paths: Seq[String], - userSpecifiedSchema: Option[StructType], - partitionSchema: StructType): PartitioningAwareFileIndex = { - - class PartitionedMetadataLogFileIndex( - sparkSession: SparkSession, - path: Path, - parameters: Map[String, String], - userSpecifiedSchema: Option[StructType], - override val partitionSchema: StructType) - extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema) - - class PartitionedInMemoryFileIndex( - sparkSession: SparkSession, - rootPathsSpecified: Seq[Path], - parameters: Map[String, String], - userSpecifiedSchema: Option[StructType], - fileStatusCache: FileStatusCache = NoopCache, - userSpecifiedPartitionSpec: Option[PartitionSpec] = None, - metadataOpsTimeNs: Option[Long] = None, - override val partitionSchema: StructType) - extends InMemoryFileIndex( - sparkSession, - rootPathsSpecified, - parameters, - userSpecifiedSchema, - fileStatusCache, - userSpecifiedPartitionSpec, - metadataOpsTimeNs) - - def globPaths: Boolean = { - val entry = options.get(DataSource.GLOB_PATHS_KEY) - Option(entry).forall(_ == "true") - } - - val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap - val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap) - if (FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)) { - new PartitionedMetadataLogFileIndex( - sparkSession, - new Path(paths.head), - options.asScala.toMap, - userSpecifiedSchema, - partitionSchema = partitionSchema) - } else { - val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary( - paths, - hadoopConf, - checkEmptyGlobPath = true, - checkFilesExist = true, - enableGlobbing = globPaths) - val fileStatusCache = FileStatusCache.getOrCreate(sparkSession) - - new PartitionedInMemoryFileIndex( - sparkSession, - rootPathsSpecified, - caseSensitiveMap, - userSpecifiedSchema, - fileStatusCache, - partitionSchema = partitionSchema) - } - } - } diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala index f7ccb08a76aa..50a362ca9dfa 100644 --- a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala @@ -18,28 +18,33 @@ package org.apache.spark.sql.paimon.shims +import org.apache.hadoop.fs.Path import org.apache.paimon.data.variant.Variant import org.apache.paimon.spark.catalyst.analysis.Spark3ResolutionRules import org.apache.paimon.spark.catalyst.parser.extensions.PaimonSpark3SqlExtensionsParser import org.apache.paimon.spark.data.{Spark3ArrayData, Spark3InternalRow, Spark3InternalRowWithBlob, SparkArrayData, SparkInternalRow} import org.apache.paimon.types.{DataType, RowType} - import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.parser.ParserInterface +import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex +import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap import java.util.{Map => JMap} +import scala.collection.JavaConverters._ + class Spark3Shim extends SparkShim { override def classicApi: ClassicApi = new Classic3Api @@ -110,6 +115,13 @@ class Spark3Shim extends SparkShim { notMatchedBySourceActions) } + override def createKeep( + context: String, + condition: Expression, + output: Seq[Expression]): Instruction = { + MergeRows.Keep(condition, output) + } + override def toPaimonVariant(o: Object): Variant = throw new UnsupportedOperationException() override def isSparkVariantType(dataType: org.apache.spark.sql.types.DataType): Boolean = false @@ -123,17 +135,69 @@ class Spark3Shim extends SparkShim { override def toPaimonVariant(array: ArrayData, pos: Int): Variant = throw new UnsupportedOperationException() - override def createFileIndex( - options: CaseInsensitiveStringMap, - sparkSession: SparkSession, - paths: Seq[String], - userSpecifiedSchema: Option[StructType], - partitionSchema: StructType): PartitioningAwareFileIndex = { - MinorVersionShim.createFileIndex( - options, - sparkSession, - paths, - userSpecifiedSchema, - partitionSchema) + def createFileIndex( + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex = { + + class PartitionedMetadataLogFileIndex( + sparkSession: SparkSession, + path: Path, + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + override val partitionSchema: StructType) + extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema) + + class PartitionedInMemoryFileIndex( + sparkSession: SparkSession, + rootPathsSpecified: Seq[Path], + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + fileStatusCache: FileStatusCache = NoopCache, + userSpecifiedPartitionSpec: Option[PartitionSpec] = None, + metadataOpsTimeNs: Option[Long] = None, + override val partitionSchema: StructType) + extends InMemoryFileIndex( + sparkSession, + rootPathsSpecified, + parameters, + userSpecifiedSchema, + fileStatusCache, + userSpecifiedPartitionSpec, + metadataOpsTimeNs) + + def globPaths: Boolean = { + val entry = options.get(DataSource.GLOB_PATHS_KEY) + Option(entry).forall(_ == "true") + } + + val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap + val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap) + if (FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)) { + new PartitionedMetadataLogFileIndex( + sparkSession, + new Path(paths.head), + options.asScala.toMap, + userSpecifiedSchema, + partitionSchema = partitionSchema) + } else { + val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary( + paths, + hadoopConf, + checkEmptyGlobPath = true, + checkFilesExist = true, + enableGlobbing = globPaths) + val fileStatusCache = FileStatusCache.getOrCreate(sparkSession) + + new PartitionedInMemoryFileIndex( + sparkSession, + rootPathsSpecified, + caseSensitiveMap, + userSpecifiedSchema, + fileStatusCache, + partitionSchema = partitionSchema) + } } } diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala index d8ba2847ab88..af4af1254246 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala @@ -20,7 +20,7 @@ package org.apache.paimon.spark.data import org.apache.paimon.types.DataType -import org.apache.spark.unsafe.types.VariantVal +import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, VariantVal} class Spark4ArrayData(override val elementType: DataType) extends AbstractSparkArrayData { @@ -28,4 +28,12 @@ class Spark4ArrayData(override val elementType: DataType) extends AbstractSparkA val v = paimonArray.getVariant(ordinal) new VariantVal(v.value(), v.metadata()) } + + def getGeography(ordinal: Int): GeographyVal = { + throw new UnsupportedOperationException("GeographyVal is not supported") + } + + def getGeometry(ordinal: Int): GeometryVal = { + throw new UnsupportedOperationException("GeographyVal is not supported") + } } diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala index 9ac2766346f9..79606b90221a 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.data import org.apache.paimon.spark.AbstractSparkInternalRow import org.apache.paimon.types.RowType -import org.apache.spark.unsafe.types.VariantVal +import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, VariantVal} class Spark4InternalRow(rowType: RowType) extends AbstractSparkInternalRow(rowType) { @@ -29,4 +29,12 @@ class Spark4InternalRow(rowType: RowType) extends AbstractSparkInternalRow(rowTy val v = row.getVariant(i) new VariantVal(v.value(), v.metadata()) } + + def getGeography(ordinal: Int): GeographyVal = { + throw new UnsupportedOperationException("GeographyVal is not supported") + } + + def getGeometry(ordinal: Int): GeometryVal = { + throw new UnsupportedOperationException("GeographyVal is not supported") + } } diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index d9b3411dfefc..3953d62b3541 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -18,92 +18,26 @@ package org.apache.spark.sql.paimon.shims -import org.apache.hadoop.fs.Path import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} -import org.apache.spark.sql.catalyst.plans.logical.{CTERelationRef, LogicalPlan, MergeAction, MergeIntoTable} -import org.apache.spark.sql.execution.datasources.{DataSource, FileStatusCache, InMemoryFileIndex, NoopCache, PartitionSpec, PartitioningAwareFileIndex} -import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex -import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction +import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap - -import org.apache.hadoop.fs.Path - -import java.util.{Map => JMap} - -import scala.collection.JavaConverters._ - object MinorVersionShim { + def createKeep(context: String, condition: Expression, output: Seq[Expression]): Instruction = { + null + } def createFileIndex( - options: CaseInsensitiveStringMap, - sparkSession: SparkSession, - paths: Seq[String], - userSpecifiedSchema: Option[StructType], - partitionSchema: StructType): PartitioningAwareFileIndex ={ - - - class PartitionedMetadataLogFileIndex( - sparkSession: SparkSession, - path: Path, - parameters: Map[String, String], - userSpecifiedSchema: Option[StructType], - override val partitionSchema: StructType) - extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema) - - class PartitionedInMemoryFileIndex( - sparkSession: SparkSession, - rootPathsSpecified: Seq[Path], - parameters: Map[String, String], - userSpecifiedSchema: Option[StructType], - fileStatusCache: FileStatusCache = NoopCache, - userSpecifiedPartitionSpec: Option[PartitionSpec] = None, - metadataOpsTimeNs: Option[Long] = None, - override val partitionSchema: StructType) - extends InMemoryFileIndex( - sparkSession, - rootPathsSpecified, - parameters, - userSpecifiedSchema, - fileStatusCache, - userSpecifiedPartitionSpec, - metadataOpsTimeNs) - - - def globPaths: Boolean = { - val entry = options.get(DataSource.GLOB_PATHS_KEY) - Option(entry).forall(_ == "true") - } - - val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap - val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap) - if (FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)) { - new PartitionedMetadataLogFileIndex( - sparkSession, - new Path(paths.head), - options.asScala.toMap, - userSpecifiedSchema, - partitionSchema = partitionSchema) - } else { - val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary( - paths, - hadoopConf, - checkEmptyGlobPath = true, - checkFilesExist = true, - enableGlobbing = globPaths) - val fileStatusCache = FileStatusCache.getOrCreate(sparkSession) - - new PartitionedInMemoryFileIndex( - sparkSession, - rootPathsSpecified, - caseSensitiveMap, - userSpecifiedSchema, - fileStatusCache, - partitionSchema = partitionSchema) - } + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex = { + null } } diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala index d7e54d42dfc5..6cee9bdd32ac 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala @@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.parser.ParserInterface import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, CTERelationRef, LogicalPlan, MergeAction, MergeIntoTable} +import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, Table, TableCatalog} @@ -58,9 +59,9 @@ class Spark4Shim extends SparkShim { } override def createSparkInternalRowWithBlob( - rowType: RowType, - blobFieldIndex: Int, - blobAsDescriptor: Boolean): SparkInternalRow = { + rowType: RowType, + blobFieldIndex: Int, + blobAsDescriptor: Boolean): SparkInternalRow = { new Spark4InternalRowWithBlob(rowType, blobFieldIndex, blobAsDescriptor) } @@ -69,42 +70,42 @@ class Spark4Shim extends SparkShim { } override def createTable( - tableCatalog: TableCatalog, - ident: Identifier, - schema: StructType, - partitions: Array[Transform], - properties: JMap[String, String]): Table = { + tableCatalog: TableCatalog, + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: JMap[String, String]): Table = { val columns = CatalogV2Util.structTypeToV2Columns(schema) tableCatalog.createTable(ident, columns, partitions, properties) } override def createCTERelationRef( - cteId: Long, - resolved: Boolean, - output: Seq[Attribute], - isStreaming: Boolean): CTERelationRef = { + cteId: Long, + resolved: Boolean, + output: Seq[Attribute], + isStreaming: Boolean): CTERelationRef = { CTERelationRef(cteId, resolved, output.toSeq, isStreaming) } override def supportsHashAggregate( - aggregateBufferAttributes: Seq[Attribute], - groupingExpression: Seq[Expression]): Boolean = { + aggregateBufferAttributes: Seq[Attribute], + groupingExpression: Seq[Expression]): Boolean = { Aggregate.supportsHashAggregate(aggregateBufferAttributes.toSeq, groupingExpression.toSeq) } override def supportsObjectHashAggregate( - aggregateExpressions: Seq[AggregateExpression], - groupByExpressions: Seq[Expression]): Boolean = + aggregateExpressions: Seq[AggregateExpression], + groupByExpressions: Seq[Expression]): Boolean = Aggregate.supportsObjectHashAggregate(aggregateExpressions.toSeq, groupByExpressions.toSeq) override def createMergeIntoTable( - targetTable: LogicalPlan, - sourceTable: LogicalPlan, - mergeCondition: Expression, - matchedActions: Seq[MergeAction], - notMatchedActions: Seq[MergeAction], - notMatchedBySourceActions: Seq[MergeAction], - withSchemaEvolution: Boolean): MergeIntoTable = { + targetTable: LogicalPlan, + sourceTable: LogicalPlan, + mergeCondition: Expression, + matchedActions: Seq[MergeAction], + notMatchedActions: Seq[MergeAction], + notMatchedBySourceActions: Seq[MergeAction], + withSchemaEvolution: Boolean): MergeIntoTable = { MergeIntoTable( targetTable, sourceTable, @@ -115,6 +116,13 @@ class Spark4Shim extends SparkShim { withSchemaEvolution) } + override def createKeep( + context: String, + condition: Expression, + output: Seq[Expression]): Instruction = { + MinorVersionShim.createKeep(context, condition, output) + } + override def toPaimonVariant(o: Object): Variant = { val v = o.asInstanceOf[VariantVal] new GenericVariant(v.getValue, v.getMetadata) @@ -136,11 +144,11 @@ class Spark4Shim extends SparkShim { override def SparkVariantType(): org.apache.spark.sql.types.DataType = DataTypes.VariantType override def createFileIndex( - options: CaseInsensitiveStringMap, - sparkSession: SparkSession, - paths: Seq[String], - userSpecifiedSchema: Option[StructType], - partitionSchema: StructType): PartitioningAwareFileIndex = { + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex = { MinorVersionShim.createFileIndex( options, sparkSession, @@ -149,4 +157,3 @@ class Spark4Shim extends SparkShim { partitionSchema) } } - From f955ba792856a5027a6411aad9540c29600dd927 Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 24 Dec 2025 20:13:17 +0800 Subject: [PATCH 07/35] match --- .../spark/sql/paimon/shims/Spark3Shim.scala | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala index 50a362ca9dfa..906ebc9d52b3 100644 --- a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala @@ -18,19 +18,20 @@ package org.apache.spark.sql.paimon.shims -import org.apache.hadoop.fs.Path import org.apache.paimon.data.variant.Variant import org.apache.paimon.spark.catalyst.analysis.Spark3ResolutionRules import org.apache.paimon.spark.catalyst.parser.extensions.PaimonSpark3SqlExtensionsParser import org.apache.paimon.spark.data.{Spark3ArrayData, Spark3InternalRow, Spark3InternalRowWithBlob, SparkArrayData, SparkInternalRow} import org.apache.paimon.types.{DataType, RowType} + +import org.apache.hadoop.fs.Path import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.parser.ParserInterface -import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} @@ -136,29 +137,29 @@ class Spark3Shim extends SparkShim { throw new UnsupportedOperationException() def createFileIndex( - options: CaseInsensitiveStringMap, - sparkSession: SparkSession, - paths: Seq[String], - userSpecifiedSchema: Option[StructType], - partitionSchema: StructType): PartitioningAwareFileIndex = { + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex = { class PartitionedMetadataLogFileIndex( - sparkSession: SparkSession, - path: Path, - parameters: Map[String, String], - userSpecifiedSchema: Option[StructType], - override val partitionSchema: StructType) + sparkSession: SparkSession, + path: Path, + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + override val partitionSchema: StructType) extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema) class PartitionedInMemoryFileIndex( - sparkSession: SparkSession, - rootPathsSpecified: Seq[Path], - parameters: Map[String, String], - userSpecifiedSchema: Option[StructType], - fileStatusCache: FileStatusCache = NoopCache, - userSpecifiedPartitionSpec: Option[PartitionSpec] = None, - metadataOpsTimeNs: Option[Long] = None, - override val partitionSchema: StructType) + sparkSession: SparkSession, + rootPathsSpecified: Seq[Path], + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + fileStatusCache: FileStatusCache = NoopCache, + userSpecifiedPartitionSpec: Option[PartitionSpec] = None, + metadataOpsTimeNs: Option[Long] = None, + override val partitionSchema: StructType) extends InMemoryFileIndex( sparkSession, rootPathsSpecified, From 0f5d3dd527032a34a5670e6a4bceb157d0fe5b36 Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 24 Dec 2025 20:16:51 +0800 Subject: [PATCH 08/35] match --- .../scala/org/apache/spark/sql/execution/SparkFormatTable.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/execution/SparkFormatTable.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/execution/SparkFormatTable.scala index 776b17401d14..94337124a13b 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/execution/SparkFormatTable.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/execution/SparkFormatTable.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.execution import org.apache.paimon.utils.StringUtils -import org.apache.hadoop.fs.Path import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal} @@ -32,7 +31,6 @@ import org.apache.spark.sql.execution.datasources.v2.json.JsonTable import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetTable import org.apache.spark.sql.execution.datasources.v2.text.{TextScanBuilder, TextTable} -import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex} import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap From f24164b77c564b48eff1418572b18207f10945a7 Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 24 Dec 2025 22:23:49 +0800 Subject: [PATCH 09/35] match --- .../scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala index 906ebc9d52b3..12972b5bf439 100644 --- a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala @@ -37,8 +37,7 @@ import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex -import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink +import org.apache.spark.sql.execution.streaming.{FileStreamSink,MetadataLogFileIndex} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap From 16dac27a21e5eec640cd4fe0605b43c6cdb0031d Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 24 Dec 2025 22:27:45 +0800 Subject: [PATCH 10/35] MemoryStream --- .../scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala | 2 +- .../src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala | 2 +- .../paimon/spark/procedure/AlterBranchProcedureTest.scala | 2 +- .../org/apache/paimon/spark/procedure/BranchProcedureTest.scala | 2 +- .../paimon/spark/procedure/CompactProcedureTestBase.scala | 2 +- .../spark/procedure/CreateAndDeleteTagProcedureTest.scala | 2 +- .../spark/procedure/CreateTagFromTimestampProcedureTest.scala | 2 +- .../paimon/spark/procedure/ExpirePartitionsProcedureTest.scala | 2 +- .../paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala | 2 +- .../apache/paimon/spark/procedure/RollbackProcedureTest.scala | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala index e103429559ba..9b9393be7118 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala @@ -19,7 +19,7 @@ package org.apache.paimon.spark import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.execution.streaming.runtime.MemoryStream import org.apache.spark.sql.streaming.StreamTest class PaimonCDCSourceTest extends PaimonSparkTestBase with StreamTest { diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala index c43170d7ba1b..9935288db9a7 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala @@ -22,7 +22,7 @@ import org.apache.paimon.Snapshot.CommitKind._ import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.execution.streaming.runtime.MemoryStream import org.apache.spark.sql.functions.{col, mean, window} import org.apache.spark.sql.streaming.StreamTest diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala index 316c36c40c56..df1df747897d 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.execution.streaming.runtime.MemoryStream import org.apache.spark.sql.streaming.StreamTest class AlterBranchProcedureTest extends PaimonSparkTestBase with StreamTest { diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala index 67786a47fe3f..111e604b1ef0 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.execution.streaming.runtime.MemoryStream import org.apache.spark.sql.streaming.StreamTest class BranchProcedureTest extends PaimonSparkTestBase with StreamTest { diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala index e89eba2e8599..19f6bc25280e 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala @@ -27,7 +27,7 @@ import org.apache.paimon.table.source.DataSplit import org.apache.spark.scheduler.{SparkListener, SparkListenerStageSubmitted} import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.execution.streaming.runtime.MemoryStream import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions import org.scalatest.time.Span diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala index 4a4c7ae215df..605f80e27ad3 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.execution.streaming.runtime.MemoryStream import org.apache.spark.sql.streaming.StreamTest class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTest { diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala index e9b00298e492..b4f7d63086ae 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala @@ -22,7 +22,7 @@ import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.paimon.utils.SnapshotNotExistException import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.execution.streaming.runtime.MemoryStream import org.apache.spark.sql.streaming.StreamTest class CreateTagFromTimestampProcedureTest extends PaimonSparkTestBase with StreamTest { diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala index 586f2e6c2d72..c7cdc0f517a7 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.execution.streaming.runtime.MemoryStream import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions.assertThatThrownBy diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala index aa65d8b9c38e..bbaf88568e2d 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala @@ -22,7 +22,7 @@ import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.paimon.utils.SnapshotManager import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.execution.streaming.runtime.MemoryStream import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions.{assertThat, assertThatIllegalArgumentException} diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala index 66f2d57e02bc..078823c3ef37 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.execution.streaming.runtime.MemoryStream import org.apache.spark.sql.streaming.StreamTest class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { From f6b380defb5e4c2103a8f4fc1218dbceb8319746 Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 25 Dec 2025 11:19:36 +0800 Subject: [PATCH 11/35] style --- .../scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala index 12972b5bf439..202974fd2e41 100644 --- a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.streaming.{FileStreamSink,MetadataLogFileIndex} +import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap From e1645831fee97ad86a3a201335741f042afcdd31 Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 25 Dec 2025 12:30:16 +0800 Subject: [PATCH 12/35] Revert "MemoryStream" This reverts commit 16dac27a21e5eec640cd4fe0605b43c6cdb0031d. --- .../scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala | 2 +- .../src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala | 2 +- .../paimon/spark/procedure/AlterBranchProcedureTest.scala | 2 +- .../org/apache/paimon/spark/procedure/BranchProcedureTest.scala | 2 +- .../paimon/spark/procedure/CompactProcedureTestBase.scala | 2 +- .../spark/procedure/CreateAndDeleteTagProcedureTest.scala | 2 +- .../spark/procedure/CreateTagFromTimestampProcedureTest.scala | 2 +- .../paimon/spark/procedure/ExpirePartitionsProcedureTest.scala | 2 +- .../paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala | 2 +- .../apache/paimon/spark/procedure/RollbackProcedureTest.scala | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala index 9b9393be7118..e103429559ba 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala @@ -19,7 +19,7 @@ package org.apache.paimon.spark import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.runtime.MemoryStream +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class PaimonCDCSourceTest extends PaimonSparkTestBase with StreamTest { diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala index 9935288db9a7..c43170d7ba1b 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala @@ -22,7 +22,7 @@ import org.apache.paimon.Snapshot.CommitKind._ import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.runtime.MemoryStream +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.functions.{col, mean, window} import org.apache.spark.sql.streaming.StreamTest diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala index df1df747897d..316c36c40c56 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.runtime.MemoryStream +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class AlterBranchProcedureTest extends PaimonSparkTestBase with StreamTest { diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala index 111e604b1ef0..67786a47fe3f 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.runtime.MemoryStream +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class BranchProcedureTest extends PaimonSparkTestBase with StreamTest { diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala index 19f6bc25280e..e89eba2e8599 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala @@ -27,7 +27,7 @@ import org.apache.paimon.table.source.DataSplit import org.apache.spark.scheduler.{SparkListener, SparkListenerStageSubmitted} import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.runtime.MemoryStream +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions import org.scalatest.time.Span diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala index 605f80e27ad3..4a4c7ae215df 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.runtime.MemoryStream +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTest { diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala index b4f7d63086ae..e9b00298e492 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala @@ -22,7 +22,7 @@ import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.paimon.utils.SnapshotNotExistException import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.runtime.MemoryStream +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class CreateTagFromTimestampProcedureTest extends PaimonSparkTestBase with StreamTest { diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala index c7cdc0f517a7..586f2e6c2d72 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.runtime.MemoryStream +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions.assertThatThrownBy diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala index bbaf88568e2d..aa65d8b9c38e 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala @@ -22,7 +22,7 @@ import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.paimon.utils.SnapshotManager import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.runtime.MemoryStream +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions.{assertThat, assertThatIllegalArgumentException} diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala index 078823c3ef37..66f2d57e02bc 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.runtime.MemoryStream +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { From 43ccba51dfd05c1f86b5cb63162166b39faac368 Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 25 Dec 2025 16:11:20 +0800 Subject: [PATCH 13/35] compile --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 2840e12f243d..cd09fbcaea37 100644 --- a/pom.xml +++ b/pom.xml @@ -432,11 +432,11 @@ under the License. 4.13.1 2.13 ${scala213.version} - 4.1.0 + 4.0.1 paimon-spark4-common_2.13 18.1.0 - 4.1 - 4.1.0 + 4.0 + 4.0.1 From 50bae9ef55042ea4b91c6ca60a5f4231a11b319c Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 25 Dec 2025 16:36:48 +0800 Subject: [PATCH 14/35] compile --- .../paimon/spark/data/Spark4ArrayData.scala | 31 +++++++++++++ .../paimon/spark/data/Spark4InternalRow.scala | 32 ++++++++++++++ .../data/Spark4InternalRowWithBlob.scala | 2 - .../sql/paimon/shims/MinorVersionShim.scala | 18 ++++++++ .../paimon/spark/data/Spark4ArrayData.scala | 2 +- .../paimon/spark/data/Spark4InternalRow.scala | 2 +- .../data/Spark4InternalRowWithBlob.scala | 43 +++++++++++++++++++ .../sql/paimon/shims/MinorVersionShim.scala | 18 ++++++++ .../sql/paimon/shims/MinorVersionShim.scala | 18 ++++++++ .../spark/sql/paimon/shims/Spark4Shim.scala | 8 ++-- 10 files changed, 166 insertions(+), 8 deletions(-) create mode 100644 paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala create mode 100644 paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala rename paimon-spark/{paimon-spark4-common => paimon-spark-4.0}/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala (93%) rename paimon-spark/{paimon-spark4-common => paimon-spark-4.1}/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala (94%) rename paimon-spark/{paimon-spark4-common => paimon-spark-4.1}/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala (94%) create mode 100644 paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala diff --git a/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala new file mode 100644 index 000000000000..d8ba2847ab88 --- /dev/null +++ b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.data + +import org.apache.paimon.types.DataType + +import org.apache.spark.unsafe.types.VariantVal + +class Spark4ArrayData(override val elementType: DataType) extends AbstractSparkArrayData { + + override def getVariant(ordinal: Int): VariantVal = { + val v = paimonArray.getVariant(ordinal) + new VariantVal(v.value(), v.metadata()) + } +} diff --git a/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala new file mode 100644 index 000000000000..9ac2766346f9 --- /dev/null +++ b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.data + +import org.apache.paimon.spark.AbstractSparkInternalRow +import org.apache.paimon.types.RowType + +import org.apache.spark.unsafe.types.VariantVal + +class Spark4InternalRow(rowType: RowType) extends AbstractSparkInternalRow(rowType) { + + override def getVariant(i: Int): VariantVal = { + val v = row.getVariant(i) + new VariantVal(v.value(), v.metadata()) + } +} diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala similarity index 93% rename from paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala rename to paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala index 0a208daea292..c52207e43197 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala +++ b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala @@ -18,12 +18,10 @@ package org.apache.paimon.spark.data -import org.apache.paimon.spark.AbstractSparkInternalRow import org.apache.paimon.types.RowType import org.apache.paimon.utils.InternalRowUtils.copyInternalRow import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.unsafe.types.VariantVal class Spark4InternalRowWithBlob(rowType: RowType, blobFieldIndex: Int, blobAsDescriptor: Boolean) extends Spark4InternalRow(rowType) { diff --git a/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index f90297f6235d..2c3ad0c659ec 100644 --- a/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -18,6 +18,9 @@ package org.apache.spark.sql.paimon.shims +import org.apache.paimon.spark.data.{Spark4ArrayData, Spark4InternalRow, Spark4InternalRowWithBlob, SparkArrayData, SparkInternalRow} +import org.apache.paimon.types.{DataType, RowType} + import org.apache.hadoop.fs.Path import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.Expression @@ -36,6 +39,21 @@ object MinorVersionShim { MergeRows.Keep(condition, output) } + def createSparkInternalRow(rowType: RowType): SparkInternalRow = { + new Spark4InternalRow(rowType) + } + + def createSparkInternalRowWithBlob( + rowType: RowType, + blobFieldIndex: Int, + blobAsDescriptor: Boolean): SparkInternalRow = { + new Spark4InternalRowWithBlob(rowType, blobFieldIndex, blobAsDescriptor) + } + + def createSparkArrayData(elementType: DataType): SparkArrayData = { + new Spark4ArrayData(elementType) + } + def createFileIndex( options: CaseInsensitiveStringMap, sparkSession: SparkSession, diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala similarity index 94% rename from paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala rename to paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala index af4af1254246..048a2c0c6e43 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala +++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala @@ -34,6 +34,6 @@ class Spark4ArrayData(override val elementType: DataType) extends AbstractSparkA } def getGeometry(ordinal: Int): GeometryVal = { - throw new UnsupportedOperationException("GeographyVal is not supported") + throw new UnsupportedOperationException("GeometryVal is not supported") } } diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala similarity index 94% rename from paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala rename to paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala index 79606b90221a..0447b26a3273 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala +++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala @@ -35,6 +35,6 @@ class Spark4InternalRow(rowType: RowType) extends AbstractSparkInternalRow(rowTy } def getGeometry(ordinal: Int): GeometryVal = { - throw new UnsupportedOperationException("GeographyVal is not supported") + throw new UnsupportedOperationException("GeometryVal is not supported") } } diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala new file mode 100644 index 000000000000..c52207e43197 --- /dev/null +++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.data + +import org.apache.paimon.types.RowType +import org.apache.paimon.utils.InternalRowUtils.copyInternalRow + +import org.apache.spark.sql.catalyst.InternalRow + +class Spark4InternalRowWithBlob(rowType: RowType, blobFieldIndex: Int, blobAsDescriptor: Boolean) + extends Spark4InternalRow(rowType) { + + override def getBinary(ordinal: Int): Array[Byte] = { + if (ordinal == blobFieldIndex) { + if (blobAsDescriptor) { + row.getBlob(ordinal).toDescriptor.serialize() + } else { + row.getBlob(ordinal).toData + } + } else { + super.getBinary(ordinal) + } + } + + override def copy: InternalRow = + SparkInternalRow.create(rowType, blobAsDescriptor).replace(copyInternalRow(row, rowType)) +} diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index 381e9dfd656e..714abd0a6aa9 100644 --- a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -18,6 +18,9 @@ package org.apache.spark.sql.paimon.shims +import org.apache.paimon.spark.data.{Spark4ArrayData, Spark4InternalRow, Spark4InternalRowWithBlob, SparkArrayData, SparkInternalRow} +import org.apache.paimon.types.{DataType, RowType} + import org.apache.hadoop.fs.Path import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.Expression @@ -45,6 +48,21 @@ object MinorVersionShim { MergeRows.Keep(ctx, condition, output) } + def createSparkInternalRow(rowType: RowType): SparkInternalRow = { + new Spark4InternalRow(rowType) + } + + def createSparkInternalRowWithBlob( + rowType: RowType, + blobFieldIndex: Int, + blobAsDescriptor: Boolean): SparkInternalRow = { + new Spark4InternalRowWithBlob(rowType, blobFieldIndex, blobAsDescriptor) + } + + def createSparkArrayData(elementType: DataType): SparkArrayData = { + new Spark4ArrayData(elementType) + } + def createFileIndex( options: CaseInsensitiveStringMap, sparkSession: SparkSession, diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index 3953d62b3541..a5ff4bb64ce9 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -18,6 +18,9 @@ package org.apache.spark.sql.paimon.shims +import org.apache.paimon.spark.data.{SparkArrayData, SparkInternalRow} +import org.apache.paimon.types.{DataType, RowType} + import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction @@ -40,4 +43,19 @@ object MinorVersionShim { null } + def createSparkInternalRow(rowType: RowType): SparkInternalRow = { + null + } + + def createSparkInternalRowWithBlob( + rowType: RowType, + blobFieldIndex: Int, + blobAsDescriptor: Boolean): SparkInternalRow = { + null + } + + def createSparkArrayData(elementType: DataType): SparkArrayData = { + null + } + } diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala index 6cee9bdd32ac..f4d86bf759ed 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala @@ -21,7 +21,7 @@ package org.apache.spark.sql.paimon.shims import org.apache.paimon.data.variant.{GenericVariant, Variant} import org.apache.paimon.spark.catalyst.analysis.Spark4ResolutionRules import org.apache.paimon.spark.catalyst.parser.extensions.PaimonSpark4SqlExtensionsParser -import org.apache.paimon.spark.data.{Spark4ArrayData, Spark4InternalRow, Spark4InternalRowWithBlob, SparkArrayData, SparkInternalRow} +import org.apache.paimon.spark.data.{SparkArrayData, SparkInternalRow} import org.apache.paimon.types.{DataType, RowType} import org.apache.spark.sql.SparkSession @@ -55,18 +55,18 @@ class Spark4Shim extends SparkShim { } override def createSparkInternalRow(rowType: RowType): SparkInternalRow = { - new Spark4InternalRow(rowType) + MinorVersionShim.createSparkInternalRow(rowType) } override def createSparkInternalRowWithBlob( rowType: RowType, blobFieldIndex: Int, blobAsDescriptor: Boolean): SparkInternalRow = { - new Spark4InternalRowWithBlob(rowType, blobFieldIndex, blobAsDescriptor) + MinorVersionShim.createSparkInternalRowWithBlob(rowType, blobFieldIndex, blobAsDescriptor) } override def createSparkArrayData(elementType: DataType): SparkArrayData = { - new Spark4ArrayData(elementType) + MinorVersionShim.createSparkArrayData(elementType) } override def createTable( From 3450f6c84b15b0901667ad4d8f3df0d1086006cb Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 25 Dec 2025 17:24:41 +0800 Subject: [PATCH 15/35] compile --- .../spark/sql/paimon/shims/MinorVersionShim.scala | 10 +++++----- pom.xml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index a5ff4bb64ce9..551d53cdefff 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap object MinorVersionShim { def createKeep(context: String, condition: Expression, output: Seq[Expression]): Instruction = { - null + throw new UnsupportedOperationException("Not implemented") } def createFileIndex( @@ -40,22 +40,22 @@ object MinorVersionShim { paths: Seq[String], userSpecifiedSchema: Option[StructType], partitionSchema: StructType): PartitioningAwareFileIndex = { - null + throw new UnsupportedOperationException("Not implemented") } def createSparkInternalRow(rowType: RowType): SparkInternalRow = { - null + throw new UnsupportedOperationException("Not implemented") } def createSparkInternalRowWithBlob( rowType: RowType, blobFieldIndex: Int, blobAsDescriptor: Boolean): SparkInternalRow = { - null + throw new UnsupportedOperationException("Not implemented") } def createSparkArrayData(elementType: DataType): SparkArrayData = { - null + throw new UnsupportedOperationException("Not implemented") } } diff --git a/pom.xml b/pom.xml index cd09fbcaea37..449bb0d881e6 100644 --- a/pom.xml +++ b/pom.xml @@ -436,7 +436,7 @@ under the License. paimon-spark4-common_2.13 18.1.0 4.0 - 4.0.1 + 4.0.1 From 30462c6e3a1aeb2e60e7d454ca820fe8d0a88a50 Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 25 Dec 2025 18:23:51 +0800 Subject: [PATCH 16/35] compile --- .../apache/paimon/spark/PaimonSinkTest.scala | 14 +++++------ .../sql/CreateAndDeleteTagProcedureTest.scala | 4 ++-- .../spark/sql/RollbackProcedureTest.scala | 4 ++-- .../sql/paimon/shims/MinorVersionShim.scala | 14 +++++++++-- .../sql/paimon/shims/MinorVersionShim.scala | 16 +++++++++++-- .../spark/sql/paimon/shims/SparkShim.scala | 19 ++++++++++++++- .../paimon/spark/PaimonCDCSourceTest.scala | 4 ++-- .../apache/paimon/spark/PaimonSinkTest.scala | 16 ++++++------- .../procedure/AlterBranchProcedureTest.scala | 4 ++-- .../spark/procedure/BranchProcedureTest.scala | 4 ++-- .../procedure/CompactProcedureTestBase.scala | 14 +++++------ .../CreateAndDeleteTagProcedureTest.scala | 6 ++--- .../CreateTagFromTimestampProcedureTest.scala | 6 ++--- .../ExpirePartitionsProcedureTest.scala | 24 +++++++++---------- .../ExpireSnapshotsProcedureTest.scala | 10 ++++---- .../procedure/RollbackProcedureTest.scala | 6 ++--- .../spark/sql/paimon/shims/Spark3Shim.scala | 15 ++++++++++-- .../sql/paimon/shims/MinorVersionShim.scala | 7 +++++- .../spark/sql/paimon/shims/Spark4Shim.scala | 8 ++++++- 19 files changed, 128 insertions(+), 67 deletions(-) diff --git a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala b/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala index ab4a9bcd9dbf..f0b8821da644 100644 --- a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala +++ b/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala @@ -20,8 +20,8 @@ package org.apache.paimon.spark import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.functions.{col, mean, window} +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest import java.sql.Date @@ -45,7 +45,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -89,7 +89,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -129,7 +129,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData.toDS .toDF("uid", "city") .groupBy("city") @@ -173,7 +173,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] intercept[RuntimeException] { inputData .toDF() @@ -196,7 +196,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Long, Int, Double)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Long, Int, Double)] val data = inputData.toDS .toDF("time", "stockId", "price") .selectExpr("CAST(time AS timestamp) AS timestamp", "stockId", "price") @@ -253,7 +253,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { spark.sql("SELECT * FROM T ORDER BY a, b"), Row(1, "2023-08-09") :: Row(2, "2023-08-09") :: Nil) - val inputData = MemoryStream[(Long, Date, Int)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Long, Date, Int)] val stream = inputData .toDS() .toDF("a", "b", "c") diff --git a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/CreateAndDeleteTagProcedureTest.scala b/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/CreateAndDeleteTagProcedureTest.scala index 3f59e897ec6c..12f0fb1f8368 100644 --- a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/CreateAndDeleteTagProcedureTest.scala +++ b/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/CreateAndDeleteTagProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.sql import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -39,7 +39,7 @@ class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTes |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/RollbackProcedureTest.scala b/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/RollbackProcedureTest.scala index 605b4cadf6e8..576ec20f11c5 100644 --- a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/RollbackProcedureTest.scala +++ b/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/RollbackProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.sql import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -39,7 +39,7 @@ class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index 2c3ad0c659ec..cf9955d81004 100644 --- a/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -22,12 +22,12 @@ import org.apache.paimon.spark.data.{Spark4ArrayData, Spark4InternalRow, Spark4I import org.apache.paimon.types.{DataType, RowType} import org.apache.hadoop.fs.Path -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.{DataFrame, Dataset, Encoder, SparkSession, SQLContext} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.MergeRows import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex} +import org.apache.spark.sql.execution.streaming.{FileStreamSink, MemoryStream, MetadataLogFileIndex, Offset} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -120,4 +120,14 @@ object MinorVersionShim { } } + def createMemoryStream[A](implicit + encoder: Encoder[A], + sqlContext: SQLContext): MemoryStreamWrapper[A] = { + val stream = MemoryStream[A] + new MemoryStreamWrapper[A] { + override def toDS(): Dataset[A] = stream.toDS() + override def toDF(): DataFrame = stream.toDF() + override def addData(data: A*): Offset = stream.addData(data) + } + } } diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index 714abd0a6aa9..72fca678acf7 100644 --- a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -22,12 +22,13 @@ import org.apache.paimon.spark.data.{Spark4ArrayData, Spark4InternalRow, Spark4I import org.apache.paimon.types.{DataType, RowType} import org.apache.hadoop.fs.Path -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.{DataFrame, Dataset, Encoder, SparkSession, SQLContext} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.MergeRows import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex +import org.apache.spark.sql.execution.streaming.Offset +import org.apache.spark.sql.execution.streaming.runtime.{MemoryStream, MetadataLogFileIndex} import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -129,4 +130,15 @@ object MinorVersionShim { } } + def createMemoryStream[A](implicit + encoder: Encoder[A], + sqlContext: SQLContext): MemoryStreamWrapper[A] = { + implicit val sparkSession: SparkSession = sqlContext.sparkSession + val stream = MemoryStream[A] + new MemoryStreamWrapper[A] { + override def toDS(): Dataset[A] = stream.toDS() + override def toDF(): DataFrame = stream.toDF() + override def addData(data: A*): Offset = stream.addData(data) + } + } } diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala index 0dd32a615a52..8beaafffd310 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala @@ -22,7 +22,7 @@ import org.apache.paimon.data.variant.Variant import org.apache.paimon.spark.data.{SparkArrayData, SparkInternalRow} import org.apache.paimon.types.{DataType, RowType} -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.{DataFrame, Dataset, Encoder, SparkSession, SQLContext} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression @@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex +import org.apache.spark.sql.execution.streaming.Offset import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -110,4 +111,20 @@ trait SparkShim { paths: Seq[String], userSpecifiedSchema: Option[StructType], partitionSchema: StructType): PartitioningAwareFileIndex + + /** + * Creates a MemoryStream wrapper for streaming tests. In Spark 4.1+, MemoryStream was moved from + * `org.apache.spark.sql.execution.streaming` to + * `org.apache.spark.sql.execution.streaming.runtime`. + */ + def createMemoryStream[A](implicit + encoder: Encoder[A], + sqlContext: SQLContext): MemoryStreamWrapper[A] +} + +/** A wrapper trait for MemoryStream to abstract away Spark version differences. */ +trait MemoryStreamWrapper[A] { + def toDS(): Dataset[A] + def toDF(): DataFrame + def addData(data: A*): Offset } diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala index e103429559ba..eebafcf3e03c 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala @@ -19,7 +19,7 @@ package org.apache.paimon.spark import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest class PaimonCDCSourceTest extends PaimonSparkTestBase with StreamTest { @@ -150,7 +150,7 @@ class PaimonCDCSourceTest extends PaimonSparkTestBase with StreamTest { val location = table.location().toString // streaming write - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val writeStream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala index c43170d7ba1b..d34237fd6060 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala @@ -22,8 +22,8 @@ import org.apache.paimon.Snapshot.CommitKind._ import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.functions.{col, mean, window} +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest import java.sql.Date @@ -47,7 +47,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -91,7 +91,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -131,7 +131,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData.toDS .toDF("uid", "city") .groupBy("city") @@ -175,7 +175,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] intercept[RuntimeException] { inputData .toDF() @@ -199,7 +199,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Long, Int, Double)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Long, Int, Double)] val data = inputData.toDS .toDF("time", "stockId", "price") .selectExpr("CAST(time AS timestamp) AS timestamp", "stockId", "price") @@ -256,7 +256,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { spark.sql("SELECT * FROM T ORDER BY a, b"), Row(1, "2023-08-09") :: Row(2, "2023-08-09") :: Nil) - val inputData = MemoryStream[(Long, Date, Int)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Long, Date, Int)] val stream = inputData .toDS() .toDF("a", "b", "c") @@ -325,7 +325,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { val table = loadTable("T") val location = table.location().toString - val inputData = MemoryStream[(Int, Int)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala index 316c36c40c56..c93d94b2f31f 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest class AlterBranchProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -37,7 +37,7 @@ class AlterBranchProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala index 67786a47fe3f..9ef95e3b9454 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest class BranchProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -38,7 +38,7 @@ class BranchProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala index e89eba2e8599..fe5cbd2d10bb 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala @@ -27,7 +27,7 @@ import org.apache.paimon.table.source.DataSplit import org.apache.spark.scheduler.{SparkListener, SparkListenerStageSubmitted} import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions import org.scalatest.time.Span @@ -102,7 +102,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, Int)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int)] val stream = inputData .toDS() .toDF("a", "b") @@ -198,7 +198,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, Int, Int)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int, Int)] val stream = inputData .toDS() .toDF("p", "a", "b") @@ -368,7 +368,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, Int)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int)] val stream = inputData .toDS() .toDF("a", "b") @@ -822,7 +822,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int, String)] val stream = inputData .toDS() .toDF("a", "b", "c") @@ -970,7 +970,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, Int, String, Int)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int, String, Int)] val stream = inputData .toDS() .toDF("a", "b", "c", "pt") @@ -1184,7 +1184,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int, String)] val stream = inputData .toDS() .toDF("a", "b", "c") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala index 4a4c7ae215df..e4cf7a201e3e 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -39,7 +39,7 @@ class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTes |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -146,7 +146,7 @@ class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTes |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala index e9b00298e492..fbbf8d507be3 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala @@ -22,7 +22,7 @@ import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.paimon.utils.SnapshotNotExistException import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest class CreateTagFromTimestampProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -39,7 +39,7 @@ class CreateTagFromTimestampProcedureTest extends PaimonSparkTestBase with Strea |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -116,7 +116,7 @@ class CreateTagFromTimestampProcedureTest extends PaimonSparkTestBase with Strea |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala index 586f2e6c2d72..edf340a94b6a 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions.assertThatThrownBy @@ -41,7 +41,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -93,7 +93,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(String, String, String)] val stream = inputData .toDS() .toDF("k", "pt", "hm") @@ -162,7 +162,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -218,7 +218,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -286,7 +286,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(String, String, String)] val stream = inputData .toDS() .toDF("k", "pt", "hm") @@ -352,7 +352,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -417,7 +417,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(String, String, String)] val stream = inputData .toDS() .toDF("k", "pt", "hm") @@ -487,7 +487,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(String, String, String)] val stream = inputData .toDS() .toDF("k", "pt", "hm") @@ -565,7 +565,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -634,7 +634,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -701,7 +701,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala index aa65d8b9c38e..666b469314dd 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala @@ -22,7 +22,7 @@ import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.paimon.utils.SnapshotManager import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions.{assertThat, assertThatIllegalArgumentException} @@ -44,7 +44,7 @@ class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -100,7 +100,7 @@ class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -175,7 +175,7 @@ class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -230,7 +230,7 @@ class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala index 66f2d57e02bc..721c19566662 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -40,7 +40,7 @@ class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { val table = loadTable("T") val location = table.location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -169,7 +169,7 @@ class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala index 202974fd2e41..f86265c0e839 100644 --- a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala @@ -25,7 +25,7 @@ import org.apache.paimon.spark.data.{Spark3ArrayData, Spark3InternalRow, Spark3I import org.apache.paimon.types.{DataType, RowType} import org.apache.hadoop.fs.Path -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.{DataFrame, Dataset, Encoder, SparkSession, SQLContext} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression @@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex} +import org.apache.spark.sql.execution.streaming.{FileStreamSink, MemoryStream, MetadataLogFileIndex, Offset} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -200,4 +200,15 @@ class Spark3Shim extends SparkShim { partitionSchema = partitionSchema) } } + + override def createMemoryStream[A](implicit + encoder: Encoder[A], + sqlContext: SQLContext): MemoryStreamWrapper[A] = { + val stream = MemoryStream[A] + new MemoryStreamWrapper[A] { + override def toDS(): Dataset[A] = stream.toDS() + override def toDF(): DataFrame = stream.toDF() + override def addData(data: A*): Offset = stream.addData(data) + } + } } diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index 551d53cdefff..1485f0fa1f8d 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -21,7 +21,7 @@ package org.apache.spark.sql.paimon.shims import org.apache.paimon.spark.data.{SparkArrayData, SparkInternalRow} import org.apache.paimon.types.{DataType, RowType} -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.{Encoder, SparkSession, SQLContext} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.execution.datasources._ @@ -58,4 +58,9 @@ object MinorVersionShim { throw new UnsupportedOperationException("Not implemented") } + def createMemoryStream[A](implicit + encoder: Encoder[A], + sqlContext: SQLContext): MemoryStreamWrapper[A] = { + throw new UnsupportedOperationException("Not implemented") + } } diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala index f4d86bf759ed..e6618dade766 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala @@ -24,7 +24,7 @@ import org.apache.paimon.spark.catalyst.parser.extensions.PaimonSpark4SqlExtensi import org.apache.paimon.spark.data.{SparkArrayData, SparkInternalRow} import org.apache.paimon.types.{DataType, RowType} -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.{Encoder, SparkSession, SQLContext} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression @@ -156,4 +156,10 @@ class Spark4Shim extends SparkShim { userSpecifiedSchema, partitionSchema) } + + override def createMemoryStream[A](implicit + encoder: Encoder[A], + sqlContext: SQLContext): MemoryStreamWrapper[A] = { + MinorVersionShim.createMemoryStream[A] + } } From 8867d5c725bc33bc72f17ed5021a56184cc05147 Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 25 Dec 2025 18:24:24 +0800 Subject: [PATCH 17/35] test --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 449bb0d881e6..2840e12f243d 100644 --- a/pom.xml +++ b/pom.xml @@ -432,11 +432,11 @@ under the License. 4.13.1 2.13 ${scala213.version} - 4.0.1 + 4.1.0 paimon-spark4-common_2.13 18.1.0 - 4.0 - 4.0.1 + 4.1 + 4.1.0 From 940a9daf3978152415d99b9f58c82b10af6aca44 Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 25 Dec 2025 22:13:55 +0800 Subject: [PATCH 18/35] scala 2.13.18 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 2840e12f243d..864bd04a8ee6 100644 --- a/pom.xml +++ b/pom.xml @@ -90,7 +90,7 @@ under the License. 1.20.1 2.12 2.12.18 - 2.13.16 + 2.13.18 ${scala212.version} ${scala212.version} 1.1.8.4 From aa595e529930fa4037f4cbada6fbebf3f1b23cdc Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 26 Dec 2025 11:21:20 +0800 Subject: [PATCH 19/35] Revert "scala 2.13.18" This reverts commit 940a9daf3978152415d99b9f58c82b10af6aca44. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 864bd04a8ee6..2840e12f243d 100644 --- a/pom.xml +++ b/pom.xml @@ -90,7 +90,7 @@ under the License. 1.20.1 2.12 2.12.18 - 2.13.18 + 2.13.16 ${scala212.version} ${scala212.version} 1.1.8.4 From b0ed607ad8e319efd65b50ec35459a408fa1d8bb Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 26 Dec 2025 11:21:20 +0800 Subject: [PATCH 20/35] Revert "test" This reverts commit 8867d5c725bc33bc72f17ed5021a56184cc05147. --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 2840e12f243d..449bb0d881e6 100644 --- a/pom.xml +++ b/pom.xml @@ -432,11 +432,11 @@ under the License. 4.13.1 2.13 ${scala213.version} - 4.1.0 + 4.0.1 paimon-spark4-common_2.13 18.1.0 - 4.1 - 4.1.0 + 4.0 + 4.0.1 From 2f0cc83842c872fb27b58cc094e073df21c8b746 Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 26 Dec 2025 11:21:21 +0800 Subject: [PATCH 21/35] Revert "compile" This reverts commit 30462c6e3a1aeb2e60e7d454ca820fe8d0a88a50. --- .../apache/paimon/spark/PaimonSinkTest.scala | 14 +++++------ .../sql/CreateAndDeleteTagProcedureTest.scala | 4 ++-- .../spark/sql/RollbackProcedureTest.scala | 4 ++-- .../sql/paimon/shims/MinorVersionShim.scala | 14 ++--------- .../sql/paimon/shims/MinorVersionShim.scala | 16 ++----------- .../spark/sql/paimon/shims/SparkShim.scala | 19 +-------------- .../paimon/spark/PaimonCDCSourceTest.scala | 4 ++-- .../apache/paimon/spark/PaimonSinkTest.scala | 16 ++++++------- .../procedure/AlterBranchProcedureTest.scala | 4 ++-- .../spark/procedure/BranchProcedureTest.scala | 4 ++-- .../procedure/CompactProcedureTestBase.scala | 14 +++++------ .../CreateAndDeleteTagProcedureTest.scala | 6 ++--- .../CreateTagFromTimestampProcedureTest.scala | 6 ++--- .../ExpirePartitionsProcedureTest.scala | 24 +++++++++---------- .../ExpireSnapshotsProcedureTest.scala | 10 ++++---- .../procedure/RollbackProcedureTest.scala | 6 ++--- .../spark/sql/paimon/shims/Spark3Shim.scala | 15 ++---------- .../sql/paimon/shims/MinorVersionShim.scala | 7 +----- .../spark/sql/paimon/shims/Spark4Shim.scala | 8 +------ 19 files changed, 67 insertions(+), 128 deletions(-) diff --git a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala b/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala index f0b8821da644..ab4a9bcd9dbf 100644 --- a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala +++ b/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala @@ -20,8 +20,8 @@ package org.apache.paimon.spark import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Row} +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.functions.{col, mean, window} -import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest import java.sql.Date @@ -45,7 +45,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -89,7 +89,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -129,7 +129,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData.toDS .toDF("uid", "city") .groupBy("city") @@ -173,7 +173,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] intercept[RuntimeException] { inputData .toDF() @@ -196,7 +196,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Long, Int, Double)] + val inputData = MemoryStream[(Long, Int, Double)] val data = inputData.toDS .toDF("time", "stockId", "price") .selectExpr("CAST(time AS timestamp) AS timestamp", "stockId", "price") @@ -253,7 +253,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { spark.sql("SELECT * FROM T ORDER BY a, b"), Row(1, "2023-08-09") :: Row(2, "2023-08-09") :: Nil) - val inputData = SparkShimLoader.shim.createMemoryStream[(Long, Date, Int)] + val inputData = MemoryStream[(Long, Date, Int)] val stream = inputData .toDS() .toDF("a", "b", "c") diff --git a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/CreateAndDeleteTagProcedureTest.scala b/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/CreateAndDeleteTagProcedureTest.scala index 12f0fb1f8368..3f59e897ec6c 100644 --- a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/CreateAndDeleteTagProcedureTest.scala +++ b/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/CreateAndDeleteTagProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.sql import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.paimon.shims.SparkShimLoader +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -39,7 +39,7 @@ class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTes |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/RollbackProcedureTest.scala b/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/RollbackProcedureTest.scala index 576ec20f11c5..605b4cadf6e8 100644 --- a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/RollbackProcedureTest.scala +++ b/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/paimon/spark/sql/RollbackProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.sql import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.paimon.shims.SparkShimLoader +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -39,7 +39,7 @@ class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index cf9955d81004..2c3ad0c659ec 100644 --- a/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark-4.0/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -22,12 +22,12 @@ import org.apache.paimon.spark.data.{Spark4ArrayData, Spark4InternalRow, Spark4I import org.apache.paimon.types.{DataType, RowType} import org.apache.hadoop.fs.Path -import org.apache.spark.sql.{DataFrame, Dataset, Encoder, SparkSession, SQLContext} +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.MergeRows import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.streaming.{FileStreamSink, MemoryStream, MetadataLogFileIndex, Offset} +import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -120,14 +120,4 @@ object MinorVersionShim { } } - def createMemoryStream[A](implicit - encoder: Encoder[A], - sqlContext: SQLContext): MemoryStreamWrapper[A] = { - val stream = MemoryStream[A] - new MemoryStreamWrapper[A] { - override def toDS(): Dataset[A] = stream.toDS() - override def toDF(): DataFrame = stream.toDF() - override def addData(data: A*): Offset = stream.addData(data) - } - } } diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index 72fca678acf7..714abd0a6aa9 100644 --- a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -22,13 +22,12 @@ import org.apache.paimon.spark.data.{Spark4ArrayData, Spark4InternalRow, Spark4I import org.apache.paimon.types.{DataType, RowType} import org.apache.hadoop.fs.Path -import org.apache.spark.sql.{DataFrame, Dataset, Encoder, SparkSession, SQLContext} +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.MergeRows import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.streaming.Offset -import org.apache.spark.sql.execution.streaming.runtime.{MemoryStream, MetadataLogFileIndex} +import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -130,15 +129,4 @@ object MinorVersionShim { } } - def createMemoryStream[A](implicit - encoder: Encoder[A], - sqlContext: SQLContext): MemoryStreamWrapper[A] = { - implicit val sparkSession: SparkSession = sqlContext.sparkSession - val stream = MemoryStream[A] - new MemoryStreamWrapper[A] { - override def toDS(): Dataset[A] = stream.toDS() - override def toDF(): DataFrame = stream.toDF() - override def addData(data: A*): Offset = stream.addData(data) - } - } } diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala index 8beaafffd310..0dd32a615a52 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala @@ -22,7 +22,7 @@ import org.apache.paimon.data.variant.Variant import org.apache.paimon.spark.data.{SparkArrayData, SparkInternalRow} import org.apache.paimon.types.{DataType, RowType} -import org.apache.spark.sql.{DataFrame, Dataset, Encoder, SparkSession, SQLContext} +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression @@ -34,7 +34,6 @@ import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex -import org.apache.spark.sql.execution.streaming.Offset import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -111,20 +110,4 @@ trait SparkShim { paths: Seq[String], userSpecifiedSchema: Option[StructType], partitionSchema: StructType): PartitioningAwareFileIndex - - /** - * Creates a MemoryStream wrapper for streaming tests. In Spark 4.1+, MemoryStream was moved from - * `org.apache.spark.sql.execution.streaming` to - * `org.apache.spark.sql.execution.streaming.runtime`. - */ - def createMemoryStream[A](implicit - encoder: Encoder[A], - sqlContext: SQLContext): MemoryStreamWrapper[A] -} - -/** A wrapper trait for MemoryStream to abstract away Spark version differences. */ -trait MemoryStreamWrapper[A] { - def toDS(): Dataset[A] - def toDF(): DataFrame - def addData(data: A*): Offset } diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala index eebafcf3e03c..e103429559ba 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala @@ -19,7 +19,7 @@ package org.apache.paimon.spark import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.paimon.shims.SparkShimLoader +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class PaimonCDCSourceTest extends PaimonSparkTestBase with StreamTest { @@ -150,7 +150,7 @@ class PaimonCDCSourceTest extends PaimonSparkTestBase with StreamTest { val location = table.location().toString // streaming write - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val writeStream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala index d34237fd6060..c43170d7ba1b 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala @@ -22,8 +22,8 @@ import org.apache.paimon.Snapshot.CommitKind._ import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Row} +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.functions.{col, mean, window} -import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.streaming.StreamTest import java.sql.Date @@ -47,7 +47,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -91,7 +91,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -131,7 +131,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData.toDS .toDF("uid", "city") .groupBy("city") @@ -175,7 +175,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] intercept[RuntimeException] { inputData .toDF() @@ -199,7 +199,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Long, Int, Double)] + val inputData = MemoryStream[(Long, Int, Double)] val data = inputData.toDS .toDF("time", "stockId", "price") .selectExpr("CAST(time AS timestamp) AS timestamp", "stockId", "price") @@ -256,7 +256,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { spark.sql("SELECT * FROM T ORDER BY a, b"), Row(1, "2023-08-09") :: Row(2, "2023-08-09") :: Nil) - val inputData = SparkShimLoader.shim.createMemoryStream[(Long, Date, Int)] + val inputData = MemoryStream[(Long, Date, Int)] val stream = inputData .toDS() .toDF("a", "b", "c") @@ -325,7 +325,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { val table = loadTable("T") val location = table.location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int)] + val inputData = MemoryStream[(Int, Int)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala index c93d94b2f31f..316c36c40c56 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.paimon.shims.SparkShimLoader +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class AlterBranchProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -37,7 +37,7 @@ class AlterBranchProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala index 9ef95e3b9454..67786a47fe3f 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.paimon.shims.SparkShimLoader +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class BranchProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -38,7 +38,7 @@ class BranchProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala index fe5cbd2d10bb..e89eba2e8599 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala @@ -27,7 +27,7 @@ import org.apache.paimon.table.source.DataSplit import org.apache.spark.scheduler.{SparkListener, SparkListenerStageSubmitted} import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.paimon.shims.SparkShimLoader +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions import org.scalatest.time.Span @@ -102,7 +102,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int)] + val inputData = MemoryStream[(Int, Int)] val stream = inputData .toDS() .toDF("a", "b") @@ -198,7 +198,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int, Int)] + val inputData = MemoryStream[(Int, Int, Int)] val stream = inputData .toDS() .toDF("p", "a", "b") @@ -368,7 +368,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int)] + val inputData = MemoryStream[(Int, Int)] val stream = inputData .toDS() .toDF("a", "b") @@ -822,7 +822,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int, String)] + val inputData = MemoryStream[(Int, Int, String)] val stream = inputData .toDS() .toDF("a", "b", "c") @@ -970,7 +970,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int, String, Int)] + val inputData = MemoryStream[(Int, Int, String, Int)] val stream = inputData .toDS() .toDF("a", "b", "c", "pt") @@ -1184,7 +1184,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, Int, String)] + val inputData = MemoryStream[(Int, Int, String)] val stream = inputData .toDS() .toDF("a", "b", "c") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala index e4cf7a201e3e..4a4c7ae215df 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.paimon.shims.SparkShimLoader +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -39,7 +39,7 @@ class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTes |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -146,7 +146,7 @@ class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTes |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala index fbbf8d507be3..e9b00298e492 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala @@ -22,7 +22,7 @@ import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.paimon.utils.SnapshotNotExistException import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.paimon.shims.SparkShimLoader +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class CreateTagFromTimestampProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -39,7 +39,7 @@ class CreateTagFromTimestampProcedureTest extends PaimonSparkTestBase with Strea |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -116,7 +116,7 @@ class CreateTagFromTimestampProcedureTest extends PaimonSparkTestBase with Strea |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala index edf340a94b6a..586f2e6c2d72 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.paimon.shims.SparkShimLoader +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions.assertThatThrownBy @@ -41,7 +41,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] + val inputData = MemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -93,7 +93,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(String, String, String)] + val inputData = MemoryStream[(String, String, String)] val stream = inputData .toDS() .toDF("k", "pt", "hm") @@ -162,7 +162,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] + val inputData = MemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -218,7 +218,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] + val inputData = MemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -286,7 +286,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(String, String, String)] + val inputData = MemoryStream[(String, String, String)] val stream = inputData .toDS() .toDF("k", "pt", "hm") @@ -352,7 +352,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] + val inputData = MemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -417,7 +417,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(String, String, String)] + val inputData = MemoryStream[(String, String, String)] val stream = inputData .toDS() .toDF("k", "pt", "hm") @@ -487,7 +487,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(String, String, String)] + val inputData = MemoryStream[(String, String, String)] val stream = inputData .toDS() .toDF("k", "pt", "hm") @@ -565,7 +565,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] + val inputData = MemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -634,7 +634,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] + val inputData = MemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -701,7 +701,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(String, String)] + val inputData = MemoryStream[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala index 666b469314dd..aa65d8b9c38e 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala @@ -22,7 +22,7 @@ import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.paimon.utils.SnapshotManager import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.paimon.shims.SparkShimLoader +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions.{assertThat, assertThatIllegalArgumentException} @@ -44,7 +44,7 @@ class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -100,7 +100,7 @@ class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -175,7 +175,7 @@ class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -230,7 +230,7 @@ class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala index 721c19566662..66f2d57e02bc 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala @@ -21,7 +21,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.paimon.shims.SparkShimLoader +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -40,7 +40,7 @@ class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { val table = loadTable("T") val location = table.location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -169,7 +169,7 @@ class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = SparkShimLoader.shim.createMemoryStream[(Int, String)] + val inputData = MemoryStream[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala index f86265c0e839..202974fd2e41 100644 --- a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala @@ -25,7 +25,7 @@ import org.apache.paimon.spark.data.{Spark3ArrayData, Spark3InternalRow, Spark3I import org.apache.paimon.types.{DataType, RowType} import org.apache.hadoop.fs.Path -import org.apache.spark.sql.{DataFrame, Dataset, Encoder, SparkSession, SQLContext} +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression @@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.streaming.{FileStreamSink, MemoryStream, MetadataLogFileIndex, Offset} +import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -200,15 +200,4 @@ class Spark3Shim extends SparkShim { partitionSchema = partitionSchema) } } - - override def createMemoryStream[A](implicit - encoder: Encoder[A], - sqlContext: SQLContext): MemoryStreamWrapper[A] = { - val stream = MemoryStream[A] - new MemoryStreamWrapper[A] { - override def toDS(): Dataset[A] = stream.toDS() - override def toDF(): DataFrame = stream.toDF() - override def addData(data: A*): Offset = stream.addData(data) - } - } } diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index 1485f0fa1f8d..551d53cdefff 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -21,7 +21,7 @@ package org.apache.spark.sql.paimon.shims import org.apache.paimon.spark.data.{SparkArrayData, SparkInternalRow} import org.apache.paimon.types.{DataType, RowType} -import org.apache.spark.sql.{Encoder, SparkSession, SQLContext} +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.execution.datasources._ @@ -58,9 +58,4 @@ object MinorVersionShim { throw new UnsupportedOperationException("Not implemented") } - def createMemoryStream[A](implicit - encoder: Encoder[A], - sqlContext: SQLContext): MemoryStreamWrapper[A] = { - throw new UnsupportedOperationException("Not implemented") - } } diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala index e6618dade766..f4d86bf759ed 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala @@ -24,7 +24,7 @@ import org.apache.paimon.spark.catalyst.parser.extensions.PaimonSpark4SqlExtensi import org.apache.paimon.spark.data.{SparkArrayData, SparkInternalRow} import org.apache.paimon.types.{DataType, RowType} -import org.apache.spark.sql.{Encoder, SparkSession, SQLContext} +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression @@ -156,10 +156,4 @@ class Spark4Shim extends SparkShim { userSpecifiedSchema, partitionSchema) } - - override def createMemoryStream[A](implicit - encoder: Encoder[A], - sqlContext: SQLContext): MemoryStreamWrapper[A] = { - MinorVersionShim.createMemoryStream[A] - } } From 104447eeba4a4231b13fe48abf7a02f8aa901a3a Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 26 Dec 2025 13:54:00 +0800 Subject: [PATCH 22/35] paimon-spark-ut --- .../paimon/spark/MemoryStreamWrapper.scala | 85 +++++++++++++++++++ .../paimon/spark/PaimonCDCSourceTest.scala | 5 +- .../apache/paimon/spark/PaimonSinkTest.scala | 16 ++-- .../procedure/AlterBranchProcedureTest.scala | 4 +- .../spark/procedure/BranchProcedureTest.scala | 4 +- .../procedure/CompactProcedureTestBase.scala | 14 +-- .../CreateAndDeleteTagProcedureTest.scala | 6 +- .../CreateTagFromTimestampProcedureTest.scala | 6 +- .../ExpirePartitionsProcedureTest.scala | 24 +++--- .../ExpireSnapshotsProcedureTest.scala | 10 +-- .../procedure/RollbackProcedureTest.scala | 6 +- pom.xml | 8 +- 12 files changed, 137 insertions(+), 51 deletions(-) create mode 100644 paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/MemoryStreamWrapper.scala diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/MemoryStreamWrapper.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/MemoryStreamWrapper.scala new file mode 100644 index 000000000000..9e2566d93dc3 --- /dev/null +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/MemoryStreamWrapper.scala @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark + +import org.apache.spark.sql.{DataFrame, Dataset, Encoder, SQLContext} +import org.apache.spark.sql.execution.streaming.Offset + +import scala.util.Try + +/** + * A wrapper for MemoryStream to handle Spark version compatibility. In Spark 4.1+, MemoryStream was + * moved from `org.apache.spark.sql.execution.streaming` to + * `org.apache.spark.sql.execution.streaming.runtime`. + */ +class MemoryStreamWrapper[A] private (stream: AnyRef) { + + private val streamClass = stream.getClass + + def toDS(): Dataset[A] = { + streamClass.getMethod("toDS").invoke(stream).asInstanceOf[Dataset[A]] + } + + def toDF(): DataFrame = { + streamClass.getMethod("toDF").invoke(stream).asInstanceOf[DataFrame] + } + + def addData(data: A*): Offset = { + val method = streamClass.getMethod("addData", classOf[TraversableOnce[_]]) + method.invoke(stream, data).asInstanceOf[Offset] + } +} + +object MemoryStreamWrapper { + + /** Creates a MemoryStream wrapper that works across different Spark versions. */ + def apply[A](implicit encoder: Encoder[A], sqlContext: SQLContext): MemoryStreamWrapper[A] = { + val stream = createMemoryStream[A] + new MemoryStreamWrapper[A](stream) + } + + private def createMemoryStream[A](implicit + encoder: Encoder[A], + sqlContext: SQLContext): AnyRef = { + // Try Spark 4.1+ path first (runtime package) + val spark41Class = Try( + Class.forName("org.apache.spark.sql.execution.streaming.runtime.MemoryStream$")) + if (spark41Class.isSuccess) { + val companion = spark41Class.get.getField("MODULE$").get(null) + // Spark 4.1+ uses implicit SparkSession instead of SQLContext + val applyMethod = companion.getClass.getMethod( + "apply", + classOf[Encoder[_]], + classOf[org.apache.spark.sql.SparkSession] + ) + return applyMethod.invoke(companion, encoder, sqlContext.sparkSession).asInstanceOf[AnyRef] + } + + // Fallback to Spark 3.x / 4.0 path + val oldClass = + Class.forName("org.apache.spark.sql.execution.streaming.MemoryStream$") + val companion = oldClass.getField("MODULE$").get(null) + val applyMethod = companion.getClass.getMethod( + "apply", + classOf[Encoder[_]], + classOf[SQLContext] + ) + applyMethod.invoke(companion, encoder, sqlContext).asInstanceOf[AnyRef] + } +} diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala index e103429559ba..6300600a820b 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala @@ -18,8 +18,9 @@ package org.apache.paimon.spark +import org.apache.paimon.spark.MemoryStreamWrapper + import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class PaimonCDCSourceTest extends PaimonSparkTestBase with StreamTest { @@ -150,7 +151,7 @@ class PaimonCDCSourceTest extends PaimonSparkTestBase with StreamTest { val location = table.location().toString // streaming write - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val writeStream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala index c43170d7ba1b..3c92b7eed9d3 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala @@ -19,10 +19,10 @@ package org.apache.paimon.spark import org.apache.paimon.Snapshot.CommitKind._ +import org.apache.paimon.spark.MemoryStreamWrapper import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.functions.{col, mean, window} import org.apache.spark.sql.streaming.StreamTest @@ -47,7 +47,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -91,7 +91,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -131,7 +131,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData.toDS .toDF("uid", "city") .groupBy("city") @@ -175,7 +175,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] intercept[RuntimeException] { inputData .toDF() @@ -199,7 +199,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Long, Int, Double)] + val inputData = MemoryStreamWrapper[(Long, Int, Double)] val data = inputData.toDS .toDF("time", "stockId", "price") .selectExpr("CAST(time AS timestamp) AS timestamp", "stockId", "price") @@ -256,7 +256,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { spark.sql("SELECT * FROM T ORDER BY a, b"), Row(1, "2023-08-09") :: Row(2, "2023-08-09") :: Nil) - val inputData = MemoryStream[(Long, Date, Int)] + val inputData = MemoryStreamWrapper[(Long, Date, Int)] val stream = inputData .toDS() .toDF("a", "b", "c") @@ -325,7 +325,7 @@ class PaimonSinkTest extends PaimonSparkTestBase with StreamTest { val table = loadTable("T") val location = table.location().toString - val inputData = MemoryStream[(Int, Int)] + val inputData = MemoryStreamWrapper[(Int, Int)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala index 316c36c40c56..59b5c8fd1cb5 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala @@ -18,10 +18,10 @@ package org.apache.paimon.spark.procedure +import org.apache.paimon.spark.MemoryStreamWrapper import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class AlterBranchProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -37,7 +37,7 @@ class AlterBranchProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala index 67786a47fe3f..4b866875eceb 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala @@ -18,10 +18,10 @@ package org.apache.paimon.spark.procedure +import org.apache.paimon.spark.MemoryStreamWrapper import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class BranchProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -38,7 +38,7 @@ class BranchProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala index e89eba2e8599..825f12b997cd 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala @@ -20,6 +20,7 @@ package org.apache.paimon.spark.procedure import org.apache.paimon.Snapshot.CommitKind import org.apache.paimon.fs.Path +import org.apache.paimon.spark.MemoryStreamWrapper import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.paimon.spark.utils.SparkProcedureUtils import org.apache.paimon.table.FileStoreTable @@ -27,7 +28,6 @@ import org.apache.paimon.table.source.DataSplit import org.apache.spark.scheduler.{SparkListener, SparkListenerStageSubmitted} import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions import org.scalatest.time.Span @@ -102,7 +102,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, Int)] + val inputData = MemoryStreamWrapper[(Int, Int)] val stream = inputData .toDS() .toDF("a", "b") @@ -198,7 +198,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, Int, Int)] + val inputData = MemoryStreamWrapper[(Int, Int, Int)] val stream = inputData .toDS() .toDF("p", "a", "b") @@ -368,7 +368,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, Int)] + val inputData = MemoryStreamWrapper[(Int, Int)] val stream = inputData .toDS() .toDF("a", "b") @@ -822,7 +822,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, Int, String)] + val inputData = MemoryStreamWrapper[(Int, Int, String)] val stream = inputData .toDS() .toDF("a", "b", "c") @@ -970,7 +970,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, Int, String, Int)] + val inputData = MemoryStreamWrapper[(Int, Int, String, Int)] val stream = inputData .toDS() .toDF("a", "b", "c", "pt") @@ -1184,7 +1184,7 @@ abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamT |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, Int, String)] + val inputData = MemoryStreamWrapper[(Int, Int, String)] val stream = inputData .toDS() .toDF("a", "b", "c") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala index 4a4c7ae215df..bcb53faf957b 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala @@ -18,10 +18,10 @@ package org.apache.paimon.spark.procedure +import org.apache.paimon.spark.MemoryStreamWrapper import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -39,7 +39,7 @@ class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTes |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -146,7 +146,7 @@ class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTes |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala index e9b00298e492..2bc8fdbb3101 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala @@ -18,11 +18,11 @@ package org.apache.paimon.spark.procedure +import org.apache.paimon.spark.MemoryStreamWrapper import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.paimon.utils.SnapshotNotExistException import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class CreateTagFromTimestampProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -39,7 +39,7 @@ class CreateTagFromTimestampProcedureTest extends PaimonSparkTestBase with Strea |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -116,7 +116,7 @@ class CreateTagFromTimestampProcedureTest extends PaimonSparkTestBase with Strea |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala index 586f2e6c2d72..1d2bd0981e72 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala @@ -18,10 +18,10 @@ package org.apache.paimon.spark.procedure +import org.apache.paimon.spark.MemoryStreamWrapper import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions.assertThatThrownBy @@ -41,7 +41,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = MemoryStreamWrapper[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -93,7 +93,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String, String)] + val inputData = MemoryStreamWrapper[(String, String, String)] val stream = inputData .toDS() .toDF("k", "pt", "hm") @@ -162,7 +162,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = MemoryStreamWrapper[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -218,7 +218,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = MemoryStreamWrapper[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -286,7 +286,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String, String)] + val inputData = MemoryStreamWrapper[(String, String, String)] val stream = inputData .toDS() .toDF("k", "pt", "hm") @@ -352,7 +352,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = MemoryStreamWrapper[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -417,7 +417,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String, String)] + val inputData = MemoryStreamWrapper[(String, String, String)] val stream = inputData .toDS() .toDF("k", "pt", "hm") @@ -487,7 +487,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String, String)] + val inputData = MemoryStreamWrapper[(String, String, String)] val stream = inputData .toDS() .toDF("k", "pt", "hm") @@ -565,7 +565,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = MemoryStreamWrapper[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -634,7 +634,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = MemoryStreamWrapper[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") @@ -701,7 +701,7 @@ class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(String, String)] + val inputData = MemoryStreamWrapper[(String, String)] val stream = inputData .toDS() .toDF("k", "pt") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala index aa65d8b9c38e..f1e3f2f14859 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala @@ -18,11 +18,11 @@ package org.apache.paimon.spark.procedure +import org.apache.paimon.spark.MemoryStreamWrapper import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.paimon.utils.SnapshotManager import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest import org.assertj.core.api.Assertions.{assertThat, assertThatIllegalArgumentException} @@ -44,7 +44,7 @@ class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -100,7 +100,7 @@ class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -175,7 +175,7 @@ class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -230,7 +230,7 @@ class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala index 66f2d57e02bc..9fc0182b5dee 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala @@ -18,10 +18,10 @@ package org.apache.paimon.spark.procedure +import org.apache.paimon.spark.MemoryStreamWrapper import org.apache.paimon.spark.PaimonSparkTestBase import org.apache.spark.sql.{Dataset, Row} -import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.streaming.StreamTest class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { @@ -40,7 +40,7 @@ class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { val table = loadTable("T") val location = table.location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") @@ -169,7 +169,7 @@ class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest { |""".stripMargin) val location = loadTable("T").location().toString - val inputData = MemoryStream[(Int, String)] + val inputData = MemoryStreamWrapper[(Int, String)] val stream = inputData .toDS() .toDF("a", "b") diff --git a/pom.xml b/pom.xml index 449bb0d881e6..9270bc5a26bb 100644 --- a/pom.xml +++ b/pom.xml @@ -90,7 +90,7 @@ under the License. 1.20.1 2.12 2.12.18 - 2.13.16 + 2.13.17 ${scala212.version} ${scala212.version} 1.1.8.4 @@ -432,11 +432,11 @@ under the License. 4.13.1 2.13 ${scala213.version} - 4.0.1 + 4.1.0 paimon-spark4-common_2.13 18.1.0 - 4.0 - 4.0.1 + 4.1 + 4.1.0 From c2ecd3698d0ac4c1c7a497be43e9f1927162dd8c Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 26 Dec 2025 14:41:47 +0800 Subject: [PATCH 23/35] compile --- .../sql/paimon/shims/MinorVersionShim.scala | 132 ------------------ .../paimon/spark/data/Spark4ArrayData.scala | 0 .../paimon/spark/data/Spark4InternalRow.scala | 0 .../data/Spark4InternalRowWithBlob.scala | 0 .../sql/paimon/shims/MinorVersionShim.scala | 97 +++++++++++-- 5 files changed, 84 insertions(+), 145 deletions(-) delete mode 100644 paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala rename paimon-spark/{paimon-spark-4.1 => paimon-spark4-common}/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala (100%) rename paimon-spark/{paimon-spark-4.1 => paimon-spark4-common}/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala (100%) rename paimon-spark/{paimon-spark-4.1 => paimon-spark4-common}/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala (100%) diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala deleted file mode 100644 index 714abd0a6aa9..000000000000 --- a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.paimon.shims - -import org.apache.paimon.spark.data.{Spark4ArrayData, Spark4InternalRow, Spark4InternalRowWithBlob, SparkArrayData, SparkInternalRow} -import org.apache.paimon.types.{DataType, RowType} - -import org.apache.hadoop.fs.Path -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.plans.logical.MergeRows -import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction -import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex -import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink -import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.util.CaseInsensitiveStringMap - -import scala.collection.JavaConverters._ - -object MinorVersionShim { - - def createKeep(context: String, condition: Expression, output: Seq[Expression]): Instruction = { - val ctx = context match { - case "COPY" => MergeRows.Copy - case "DELETE" => MergeRows.Delete - case "INSERT" => MergeRows.Insert - case "UPDATE" => MergeRows.Update - case _ => MergeRows.Copy - } - - MergeRows.Keep(ctx, condition, output) - } - - def createSparkInternalRow(rowType: RowType): SparkInternalRow = { - new Spark4InternalRow(rowType) - } - - def createSparkInternalRowWithBlob( - rowType: RowType, - blobFieldIndex: Int, - blobAsDescriptor: Boolean): SparkInternalRow = { - new Spark4InternalRowWithBlob(rowType, blobFieldIndex, blobAsDescriptor) - } - - def createSparkArrayData(elementType: DataType): SparkArrayData = { - new Spark4ArrayData(elementType) - } - - def createFileIndex( - options: CaseInsensitiveStringMap, - sparkSession: SparkSession, - paths: Seq[String], - userSpecifiedSchema: Option[StructType], - partitionSchema: StructType): PartitioningAwareFileIndex = { - - class PartitionedMetadataLogFileIndex( - sparkSession: SparkSession, - path: Path, - parameters: Map[String, String], - userSpecifiedSchema: Option[StructType], - override val partitionSchema: StructType) - extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema) - - class PartitionedInMemoryFileIndex( - sparkSession: SparkSession, - rootPathsSpecified: Seq[Path], - parameters: Map[String, String], - userSpecifiedSchema: Option[StructType], - fileStatusCache: FileStatusCache = NoopCache, - userSpecifiedPartitionSpec: Option[PartitionSpec] = None, - metadataOpsTimeNs: Option[Long] = None, - override val partitionSchema: StructType) - extends InMemoryFileIndex( - sparkSession, - rootPathsSpecified, - parameters, - userSpecifiedSchema, - fileStatusCache, - userSpecifiedPartitionSpec, - metadataOpsTimeNs) - - def globPaths: Boolean = { - val entry = options.get(DataSource.GLOB_PATHS_KEY) - Option(entry).forall(_ == "true") - } - - val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap - val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap) - if (FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)) { - new PartitionedMetadataLogFileIndex( - sparkSession, - new Path(paths.head), - options.asScala.toMap, - userSpecifiedSchema, - partitionSchema = partitionSchema) - } else { - val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary( - paths, - hadoopConf, - checkEmptyGlobPath = true, - checkFilesExist = true, - enableGlobbing = globPaths) - val fileStatusCache = FileStatusCache.getOrCreate(sparkSession) - - new PartitionedInMemoryFileIndex( - sparkSession, - rootPathsSpecified, - caseSensitiveMap, - userSpecifiedSchema, - fileStatusCache, - partitionSchema = partitionSchema) - } - } - -} diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala similarity index 100% rename from paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala rename to paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala similarity index 100% rename from paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala rename to paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala similarity index 100% rename from paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala rename to paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRowWithBlob.scala diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index 551d53cdefff..714abd0a6aa9 100644 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -18,44 +18,115 @@ package org.apache.spark.sql.paimon.shims -import org.apache.paimon.spark.data.{SparkArrayData, SparkInternalRow} +import org.apache.paimon.spark.data.{Spark4ArrayData, Spark4InternalRow, Spark4InternalRowWithBlob, SparkArrayData, SparkInternalRow} import org.apache.paimon.types.{DataType, RowType} +import org.apache.hadoop.fs.Path import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.plans.logical.MergeRows import org.apache.spark.sql.catalyst.plans.logical.MergeRows.Instruction import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex +import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap +import scala.collection.JavaConverters._ + object MinorVersionShim { def createKeep(context: String, condition: Expression, output: Seq[Expression]): Instruction = { - throw new UnsupportedOperationException("Not implemented") - } + val ctx = context match { + case "COPY" => MergeRows.Copy + case "DELETE" => MergeRows.Delete + case "INSERT" => MergeRows.Insert + case "UPDATE" => MergeRows.Update + case _ => MergeRows.Copy + } - def createFileIndex( - options: CaseInsensitiveStringMap, - sparkSession: SparkSession, - paths: Seq[String], - userSpecifiedSchema: Option[StructType], - partitionSchema: StructType): PartitioningAwareFileIndex = { - throw new UnsupportedOperationException("Not implemented") + MergeRows.Keep(ctx, condition, output) } def createSparkInternalRow(rowType: RowType): SparkInternalRow = { - throw new UnsupportedOperationException("Not implemented") + new Spark4InternalRow(rowType) } def createSparkInternalRowWithBlob( rowType: RowType, blobFieldIndex: Int, blobAsDescriptor: Boolean): SparkInternalRow = { - throw new UnsupportedOperationException("Not implemented") + new Spark4InternalRowWithBlob(rowType, blobFieldIndex, blobAsDescriptor) } def createSparkArrayData(elementType: DataType): SparkArrayData = { - throw new UnsupportedOperationException("Not implemented") + new Spark4ArrayData(elementType) + } + + def createFileIndex( + options: CaseInsensitiveStringMap, + sparkSession: SparkSession, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + partitionSchema: StructType): PartitioningAwareFileIndex = { + + class PartitionedMetadataLogFileIndex( + sparkSession: SparkSession, + path: Path, + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + override val partitionSchema: StructType) + extends MetadataLogFileIndex(sparkSession, path, parameters, userSpecifiedSchema) + + class PartitionedInMemoryFileIndex( + sparkSession: SparkSession, + rootPathsSpecified: Seq[Path], + parameters: Map[String, String], + userSpecifiedSchema: Option[StructType], + fileStatusCache: FileStatusCache = NoopCache, + userSpecifiedPartitionSpec: Option[PartitionSpec] = None, + metadataOpsTimeNs: Option[Long] = None, + override val partitionSchema: StructType) + extends InMemoryFileIndex( + sparkSession, + rootPathsSpecified, + parameters, + userSpecifiedSchema, + fileStatusCache, + userSpecifiedPartitionSpec, + metadataOpsTimeNs) + + def globPaths: Boolean = { + val entry = options.get(DataSource.GLOB_PATHS_KEY) + Option(entry).forall(_ == "true") + } + + val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap + val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap) + if (FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)) { + new PartitionedMetadataLogFileIndex( + sparkSession, + new Path(paths.head), + options.asScala.toMap, + userSpecifiedSchema, + partitionSchema = partitionSchema) + } else { + val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary( + paths, + hadoopConf, + checkEmptyGlobPath = true, + checkFilesExist = true, + enableGlobbing = globPaths) + val fileStatusCache = FileStatusCache.getOrCreate(sparkSession) + + new PartitionedInMemoryFileIndex( + sparkSession, + rootPathsSpecified, + caseSensitiveMap, + userSpecifiedSchema, + fileStatusCache, + partitionSchema = partitionSchema) + } } } From d60b2d5c64f4a4f17ed274eff226512b1b2b8b1e Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 26 Dec 2025 15:27:39 +0800 Subject: [PATCH 24/35] test --- .../test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala index 497c00f19114..f1415266a231 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala @@ -76,6 +76,7 @@ class PaimonSparkTestBase .set("spark.sql.catalog.paimon.warehouse", tempDBDir.getCanonicalPath) .set("spark.sql.extensions", classOf[PaimonSparkSessionExtensions].getName) .set("spark.serializer", serializer) + .set("spark.paimon.write.use-v2-write", "true") } override protected def beforeAll(): Unit = { From 0b31a724de9b3d3574e9df60601ca01b86627df1 Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 26 Dec 2025 18:28:47 +0800 Subject: [PATCH 25/35] Revert "test" This reverts commit d60b2d5c64f4a4f17ed274eff226512b1b2b8b1e. --- .../test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala index f1415266a231..497c00f19114 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala @@ -76,7 +76,6 @@ class PaimonSparkTestBase .set("spark.sql.catalog.paimon.warehouse", tempDBDir.getCanonicalPath) .set("spark.sql.extensions", classOf[PaimonSparkSessionExtensions].getName) .set("spark.serializer", serializer) - .set("spark.paimon.write.use-v2-write", "true") } override protected def beforeAll(): Unit = { From 3ac086b1b33a80d392c57878424de8ff17320e5b Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 26 Dec 2025 18:30:18 +0800 Subject: [PATCH 26/35] test --- .../src/main/scala/org/apache/paimon/spark/SparkTable.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala index 740e7b5994b7..8ead54047f2b 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala @@ -32,7 +32,7 @@ case class SparkTable(override val table: Table) override def newRowLevelOperationBuilder( info: RowLevelOperationInfo): RowLevelOperationBuilder = { table match { - case t: FileStoreTable if useV2Write => + case t: FileStoreTable => () => new PaimonSparkCopyOnWriteOperation(t, info) case _ => throw new UnsupportedOperationException( From 5ec2588f603dd70111c0121d58ded7eea03e0660 Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 26 Dec 2025 19:46:25 +0800 Subject: [PATCH 27/35] Revert "test" This reverts commit 3ac086b1b33a80d392c57878424de8ff17320e5b. --- .../src/main/scala/org/apache/paimon/spark/SparkTable.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala index 8ead54047f2b..740e7b5994b7 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala @@ -32,7 +32,7 @@ case class SparkTable(override val table: Table) override def newRowLevelOperationBuilder( info: RowLevelOperationInfo): RowLevelOperationBuilder = { table match { - case t: FileStoreTable => + case t: FileStoreTable if useV2Write => () => new PaimonSparkCopyOnWriteOperation(t, info) case _ => throw new UnsupportedOperationException( From dd2909cee9be99fd3819e42c5e6b8a7a0eb63c4f Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 26 Dec 2025 19:47:08 +0800 Subject: [PATCH 28/35] test --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 9270bc5a26bb..e6cc3924348c 100644 --- a/pom.xml +++ b/pom.xml @@ -435,8 +435,8 @@ under the License. 4.1.0 paimon-spark4-common_2.13 18.1.0 - 4.1 - 4.1.0 + 4.0 + 4.0.1 From b688c16591061d6f4236a170e7a3acaf1afab0cf Mon Sep 17 00:00:00 2001 From: sychen Date: Sat, 27 Dec 2025 17:26:24 +0800 Subject: [PATCH 29/35] test --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e6cc3924348c..051908b4d25b 100644 --- a/pom.xml +++ b/pom.xml @@ -432,7 +432,7 @@ under the License. 4.13.1 2.13 ${scala213.version} - 4.1.0 + 4.0.1 paimon-spark4-common_2.13 18.1.0 4.0 From 52a14ea4098e499c0005d4eef4037eec4799a075 Mon Sep 17 00:00:00 2001 From: sychen Date: Sat, 27 Dec 2025 18:20:53 +0800 Subject: [PATCH 30/35] Revert "test" This reverts commit b688c16591061d6f4236a170e7a3acaf1afab0cf. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 051908b4d25b..e6cc3924348c 100644 --- a/pom.xml +++ b/pom.xml @@ -432,7 +432,7 @@ under the License. 4.13.1 2.13 ${scala213.version} - 4.0.1 + 4.1.0 paimon-spark4-common_2.13 18.1.0 4.0 From 00012a22d254c5ecf2a18004e205981ab2e44a82 Mon Sep 17 00:00:00 2001 From: sychen Date: Sat, 27 Dec 2025 18:22:41 +0800 Subject: [PATCH 31/35] Revert "test" This reverts commit dd2909cee9be99fd3819e42c5e6b8a7a0eb63c4f. --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index e6cc3924348c..9270bc5a26bb 100644 --- a/pom.xml +++ b/pom.xml @@ -435,8 +435,8 @@ under the License. 4.1.0 paimon-spark4-common_2.13 18.1.0 - 4.0 - 4.0.1 + 4.1 + 4.1.0 From 1ab4172f68a593dd35f5fd042bf3a711ef98453f Mon Sep 17 00:00:00 2001 From: sychen Date: Sat, 27 Dec 2025 18:49:35 +0800 Subject: [PATCH 32/35] suffix 4.1 --- .github/workflows/utitcase-spark-4.x.yml | 4 ++-- .../org/apache/spark/sql/paimon/shims/MinorVersionShim.scala | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/utitcase-spark-4.x.yml b/.github/workflows/utitcase-spark-4.x.yml index 5b877803068b..024dcc67ebb0 100644 --- a/.github/workflows/utitcase-spark-4.x.yml +++ b/.github/workflows/utitcase-spark-4.x.yml @@ -61,10 +61,10 @@ jobs: jvm_timezone=$(random_timezone) echo "JVM timezone is set to $jvm_timezone" test_modules="" - for suffix in ut 4.0; do + for suffix in ut 4.1 4.0; do test_modules+="org.apache.paimon:paimon-spark-${suffix}_2.13," done test_modules="${test_modules%,}" mvn -T 2C -B verify -pl "${test_modules}" -Duser.timezone=$jvm_timezone -Pspark4,flink1 env: - MAVEN_OPTS: -Xmx4096m \ No newline at end of file + MAVEN_OPTS: -Xmx4096m diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala index 06de30e4af06..c069c02dc5b7 100644 --- a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/MinorVersionShim.scala @@ -44,5 +44,4 @@ object MinorVersionShim { notMatchedActions, notMatchedBySourceActions) } - } From 280ac37e34b47cb27d449a2afe71fd2909ff1050 Mon Sep 17 00:00:00 2001 From: sychen Date: Sat, 27 Dec 2025 20:54:38 +0800 Subject: [PATCH 33/35] rewrite ReplaceData --- .../org/apache/paimon/spark/SparkTable.scala | 4 +- .../catalyst/analysis/PaimonDeleteTable.scala | 18 +++++- .../catalyst/analysis/PaimonRelation.scala | 27 +++++++++ .../PaimonSpark41CopyOnWriteOperation.scala | 58 +++++++++++++++++++ 4 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/rowops/PaimonSpark41CopyOnWriteOperation.scala diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala index 740e7b5994b7..0538b7cca426 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/SparkTable.scala @@ -18,7 +18,7 @@ package org.apache.paimon.spark -import org.apache.paimon.spark.rowops.PaimonSparkCopyOnWriteOperation +import org.apache.paimon.spark.rowops.{PaimonSpark41CopyOnWriteOperation, PaimonSparkCopyOnWriteOperation} import org.apache.paimon.table.{FileStoreTable, Table} import org.apache.spark.sql.connector.catalog.SupportsRowLevelOperations @@ -34,6 +34,8 @@ case class SparkTable(override val table: Table) table match { case t: FileStoreTable if useV2Write => () => new PaimonSparkCopyOnWriteOperation(t, info) + case t: FileStoreTable if org.apache.spark.SPARK_VERSION >= "4.1" => + () => new PaimonSpark41CopyOnWriteOperation(t, info) case _ => throw new UnsupportedOperationException( s"Write operation is only supported for FileStoreTable with V2 write enabled. " + diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonDeleteTable.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonDeleteTable.scala index 6808e64c4550..2d6bfd78f9a6 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonDeleteTable.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonDeleteTable.scala @@ -24,7 +24,7 @@ import org.apache.paimon.spark.commands.DeleteFromPaimonTableCommand import org.apache.paimon.table.FileStoreTable import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LogicalPlan, ReplaceData} import org.apache.spark.sql.catalyst.rules.Rule object PaimonDeleteTable extends Rule[LogicalPlan] with RowLevelHelper { @@ -48,6 +48,22 @@ object PaimonDeleteTable extends Rule[LogicalPlan] with RowLevelHelper { override def apply(plan: LogicalPlan): LogicalPlan = { plan.resolveOperators { + case d @ ReplaceData(PaimonRelation(table), condition, _, _, _, _, _) + if d.resolved && shouldFallbackToV1Delete(table, condition) => + checkPaimonTable(table.getTable) + + table.getTable match { + case paimonTable: FileStoreTable => + val relation = PaimonRelation.getPaimonRelation(d.table) + if (paimonTable.coreOptions().dataEvolutionEnabled()) { + throw new RuntimeException( + "Delete operation is not supported when data evolution is enabled yet.") + } + DeleteFromPaimonTableCommand(relation, paimonTable, condition) + + case _ => + throw new RuntimeException("Delete Operation is only supported for FileStoreTable.") + } case d @ DeleteFromTable(PaimonRelation(table), condition) if d.resolved && shouldFallbackToV1Delete(table, condition) => checkPaimonTable(table.getTable) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonRelation.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonRelation.scala index 0ba17e2006cb..2a5f67c32daf 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonRelation.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonRelation.scala @@ -23,6 +23,7 @@ import org.apache.paimon.spark.SparkTable import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, ResolvedTable} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} +import org.apache.spark.sql.connector.catalog.Table import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import scala.util.control.NonFatal @@ -36,6 +37,8 @@ object PaimonRelation extends Logging { Some(d.table.asInstanceOf[SparkTable]) case d: DataSourceV2Relation if d.table.isInstanceOf[SparkTable] => Some(d.table.asInstanceOf[SparkTable]) + case d: DataSourceV2Relation if isRowLevelOpTable(d.table) => + Some(getTableFromRowLevelOpTable(d.table)) case ResolvedTable(_, _, table: SparkTable, _) => Some(table) case _ => None } @@ -54,7 +57,31 @@ object PaimonRelation extends Logging { EliminateSubqueryAliases(plan) match { case Project(_, d: DataSourceV2Relation) if d.table.isInstanceOf[SparkTable] => d case d: DataSourceV2Relation if d.table.isInstanceOf[SparkTable] => d + case d: DataSourceV2Relation if isRowLevelOpTable(d.table) => + d.copy(table = getTableFromRowLevelOpTable(d.table)) case _ => throw new RuntimeException(s"It's not a paimon table, $plan") } } + + private def isRowLevelOpTable(table: Table) = { + if (table.getClass.getName == "org.apache.spark.sql.connector.write.RowLevelOperationTable") { + val clazz = Class.forName("org.apache.spark.sql.connector.write.RowLevelOperationTable") + val method = clazz.getMethod("table") + val innerTable = method.invoke(table).asInstanceOf[Table] + innerTable.isInstanceOf[SparkTable] + } else { + false + } + } + + private def getTableFromRowLevelOpTable(table: Table) = { + if (table.getClass.getName == "org.apache.spark.sql.connector.write.RowLevelOperationTable") { + val clazz = Class.forName("org.apache.spark.sql.connector.write.RowLevelOperationTable") + val method = clazz.getMethod("table") + val innerTable = method.invoke(table).asInstanceOf[Table] + innerTable.asInstanceOf[SparkTable] + } else { + table.asInstanceOf[SparkTable] + } + } } diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/rowops/PaimonSpark41CopyOnWriteOperation.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/rowops/PaimonSpark41CopyOnWriteOperation.scala new file mode 100644 index 000000000000..a41aefc2745b --- /dev/null +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/rowops/PaimonSpark41CopyOnWriteOperation.scala @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.rowops + +import org.apache.paimon.spark.PaimonBaseScanBuilder +import org.apache.paimon.spark.schema.PaimonMetadataColumn.FILE_PATH_COLUMN +import org.apache.paimon.table.FileStoreTable + +import org.apache.spark.sql.connector.expressions.{Expressions, NamedReference} +import org.apache.spark.sql.connector.read.{Scan, ScanBuilder} +import org.apache.spark.sql.connector.write.{LogicalWriteInfo, RowLevelOperation, RowLevelOperationInfo, WriteBuilder} +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +class PaimonSpark41CopyOnWriteOperation(table: FileStoreTable, info: RowLevelOperationInfo) + extends RowLevelOperation { + + private var copyOnWriteScan: Option[PaimonCopyOnWriteScan] = None + + override def command(): RowLevelOperation.Command = info.command() + + override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = { + new PaimonBaseScanBuilder { + override lazy val table: FileStoreTable = + PaimonSpark41CopyOnWriteOperation.this.table.copy(options.asCaseSensitiveMap) + + override def build(): Scan = { + val scan = + PaimonCopyOnWriteScan(table, requiredSchema, pushedPartitionFilters, pushedDataFilters) + PaimonSpark41CopyOnWriteOperation.this.copyOnWriteScan = Option(scan) + scan + } + } + } + + override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = { + new WriteBuilder {} + } + + override def requiredMetadataAttributes(): Array[NamedReference] = { + Array(Expressions.column(FILE_PATH_COLUMN)) + } +} From 9c5b560cdf5ed60011c2872251c02d090a5da5a4 Mon Sep 17 00:00:00 2001 From: sychen Date: Sat, 27 Dec 2025 21:20:17 +0800 Subject: [PATCH 34/35] compile ReplaceData --- .../spark/catalyst/analysis/PaimonDeleteTable.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonDeleteTable.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonDeleteTable.scala index 2d6bfd78f9a6..81da473fb731 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonDeleteTable.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonDeleteTable.scala @@ -47,9 +47,15 @@ object PaimonDeleteTable extends Rule[LogicalPlan] with RowLevelHelper { override val operation: RowLevelOp = Delete override def apply(plan: LogicalPlan): LogicalPlan = { + def isPaimonTable(d: ReplaceData): Boolean = { + val maybeTable = PaimonRelation.unapply(d.table) + maybeTable.exists(table => shouldFallbackToV1Delete(table, d.condition)) + } + plan.resolveOperators { - case d @ ReplaceData(PaimonRelation(table), condition, _, _, _, _, _) - if d.resolved && shouldFallbackToV1Delete(table, condition) => + case d: ReplaceData if d.resolved && isPaimonTable(d) => + val condition = d.condition + val table = PaimonRelation.unapply(d.table).get checkPaimonTable(table.getTable) table.getTable match { From 1e42fae59429a9cbf4adbc1b387ff872768d9cab Mon Sep 17 00:00:00 2001 From: sychen Date: Sat, 27 Dec 2025 23:30:35 +0800 Subject: [PATCH 35/35] v2 write --- .../paimon/spark/sql/V2WriteRequireDistributionTest.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/V2WriteRequireDistributionTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/V2WriteRequireDistributionTest.scala index 02a5b9a83015..2b147dbf93fa 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/V2WriteRequireDistributionTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/V2WriteRequireDistributionTest.scala @@ -49,7 +49,7 @@ class V2WriteRequireDistributionTest extends PaimonSparkTestBase with AdaptiveSp val node1 = nodes(0) assert( node1.isInstanceOf[AppendDataExec] && - node1.toString.contains("PaimonWrite(table=test.t1"), + node1.asInstanceOf[AppendDataExec].write.toString.contains("PaimonWrite(table=test.t1"), s"Expected AppendDataExec with specific paimon write, but got: $node1" ) @@ -92,7 +92,7 @@ class V2WriteRequireDistributionTest extends PaimonSparkTestBase with AdaptiveSp val node1 = nodes(0) assert( node1.isInstanceOf[AppendDataExec] && - node1.toString.contains("PaimonWrite(table=test.t1"), + node1.asInstanceOf[AppendDataExec].write.toString.contains("PaimonWrite(table=test.t1"), s"Expected AppendDataExec with specific paimon write, but got: $node1" )