-
Notifications
You must be signed in to change notification settings - Fork 24
Open
Labels
Description
What went wrong?
If you add spaces between column names in the columnsToIndex property, an error will be thrown saying that one of the column does not exist
How to reproduce?
1. Code that triggered the bug, or steps to reproduce:
Run sql:
CREATE EXTERNAL TABLE store_sales_qbeast
| USING qbeast
| LOCATION '$tablePath'
| TBLPROPERTIES('columnsToIndex'='ss_ticket_number, ss_item_sk', 'cubeSize'='1000000')
| SELECT * FROM input_table2. Branch and commit id:
0.9.0
3. Spark version:
3.5.2
4. How are you running Spark?
Spark-shell in workspace
5. Stack trace:
java.lang.IllegalArgumentException: ss_item_sk does not exist. Available: ss_sold_date_sk, ss_sold_time_sk, ss_item_sk, ss_customer_sk, ss_cdemo_sk, ss_hdemo_sk, ss_addr_sk, ss_store_sk, ss_promo_sk, ss_ticket_number, ss_quantity, ss_wholesale_cost, ss_list_price, ss_sales_price, ss_ext_discount_amt, ss_ext_sales_price, ss_ext_wholesale_cost, ss_ext_list_price, ss_ext_tax, ss_coupon_amt, ss_net_paid, ss_net_paid_inc_tax, ss_net_profit
at org.apache.spark.sql.types.StructType.$anonfun$apply$1(StructType.scala:282)
at scala.collection.immutable.HashMap$HashTrieMap.getOrElse0(HashMap.scala:596)
at scala.collection.immutable.HashMap.getOrElse(HashMap.scala:73)
at org.apache.spark.sql.types.StructType.apply(StructType.scala:281)
at io.qbeast.core.model.ColumnToIndexUtils$.getColumnQType(ColumnToIndexUtils.scala:28)
at io.qbeast.core.model.ColumnToIndex.toTransformer(ColumnToIndex.scala:31)
at io.qbeast.spark.index.SparkRevisionFactory$.$anonfun$createNewRevision$1(SparkRevisionFactory.scala:49)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at io.qbeast.spark.index.SparkRevisionFactory$.createNewRevision(SparkRevisionFactory.scala:49)
at io.qbeast.table.IndexedTableImpl.save(IndexedTable.scala:299)
at io.qbeast.catalog.QbeastCatalogUtils.updateLog(QbeastCatalogUtils.scala:197)
at io.qbeast.catalog.QbeastCatalogUtils.createQbeastTable(QbeastCatalogUtils.scala:297)
at io.qbeast.catalog.QbeastCatalogUtils.createQbeastTable$(QbeastCatalogUtils.scala:233)
at io.qbeast.sources.v2.QbeastStagedTableImpl.createQbeastTable(QbeastStagedTableImpl.scala:44)
at io.qbeast.sources.v2.QbeastStagedTableImpl.commitStagedChanges(QbeastStagedTableImpl.scala:98)
at org.apache.spark.sql.execution.datasources.v2.V2CreateTableAsSelectBaseExec.$anonfun$writeToTable$1(WriteToDataSourceV2Exec.scala:580)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397)
at org.apache.spark.sql.execution.datasources.v2.V2CreateTableAsSelectBaseExec.writeToTable(WriteToDataSourceV2Exec.scala:573)
at org.apache.spark.sql.execution.datasources.v2.V2CreateTableAsSelectBaseExec.writeToTable$(WriteToDataSourceV2Exec.scala:567)
at org.apache.spark.sql.execution.datasources.v2.AtomicCreateTableAsSelectExec.writeToTable(WriteToDataSourceV2Exec.scala:100)
at org.apache.spark.sql.execution.datasources.v2.AtomicCreateTableAsSelectExec.run(WriteToDataSourceV2Exec.scala:121)
at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)
at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)
at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)
at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:220)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
at org.apache.spark.sql.SparkSession.$anonfun$sql$4(SparkSession.scala:691)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:682)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:713)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:744)
... 60 elided