From 861981c4aca92205955791c88903e40544113314 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 8 Dec 2025 07:51:39 +0100 Subject: [PATCH 1/2] Update 6 crates to rust edition 2024 --- datafusion/datasource/Cargo.toml | 2 +- .../benches/split_groups_by_statistics.rs | 2 +- datafusion/datasource/src/decoder.rs | 28 ++- datafusion/datasource/src/display.rs | 2 +- datafusion/datasource/src/file.rs | 4 +- .../datasource/src/file_compression_type.rs | 12 +- datafusion/datasource/src/file_format.rs | 2 +- datafusion/datasource/src/file_groups.rs | 2 +- datafusion/datasource/src/file_scan_config.rs | 74 +++--- datafusion/datasource/src/file_sink_config.rs | 4 +- datafusion/datasource/src/file_stream.rs | 21 +- datafusion/datasource/src/memory.rs | 21 +- datafusion/datasource/src/mod.rs | 11 +- datafusion/datasource/src/projection.rs | 17 +- datafusion/datasource/src/schema_adapter.rs | 14 +- datafusion/datasource/src/sink.rs | 6 +- datafusion/datasource/src/statistics.rs | 4 +- datafusion/datasource/src/test_util.rs | 2 +- datafusion/datasource/src/url.rs | 18 +- datafusion/datasource/src/write/demux.rs | 16 +- datafusion/datasource/src/write/mod.rs | 2 +- .../datasource/src/write/orchestration.rs | 22 +- datafusion/functions-nested/Cargo.toml | 2 +- .../functions-nested/benches/array_slice.rs | 4 +- datafusion/functions-nested/benches/map.rs | 6 +- datafusion/functions-nested/src/array_has.rs | 26 +- .../functions-nested/src/cardinality.rs | 4 +- datafusion/functions-nested/src/concat.rs | 6 +- datafusion/functions-nested/src/dimension.rs | 2 +- datafusion/functions-nested/src/distance.rs | 4 +- datafusion/functions-nested/src/empty.rs | 2 +- datafusion/functions-nested/src/except.rs | 6 +- datafusion/functions-nested/src/extract.rs | 14 +- datafusion/functions-nested/src/flatten.rs | 2 +- datafusion/functions-nested/src/length.rs | 2 +- datafusion/functions-nested/src/make_array.rs | 8 +- datafusion/functions-nested/src/map.rs | 2 +- .../functions-nested/src/map_entries.rs | 2 +- .../functions-nested/src/map_extract.rs | 4 +- datafusion/functions-nested/src/map_keys.rs | 2 +- datafusion/functions-nested/src/map_values.rs | 2 +- datafusion/functions-nested/src/min_max.rs | 4 +- datafusion/functions-nested/src/planner.rs | 6 +- datafusion/functions-nested/src/position.rs | 6 +- datafusion/functions-nested/src/range.rs | 14 +- datafusion/functions-nested/src/remove.rs | 6 +- datafusion/functions-nested/src/repeat.rs | 6 +- datafusion/functions-nested/src/replace.rs | 6 +- datafusion/functions-nested/src/resize.rs | 8 +- datafusion/functions-nested/src/reverse.rs | 2 +- datafusion/functions-nested/src/set_ops.rs | 14 +- datafusion/functions-nested/src/sort.rs | 4 +- datafusion/functions-nested/src/string.rs | 76 ++++-- datafusion/functions-nested/src/utils.rs | 2 +- datafusion/optimizer/Cargo.toml | 2 +- .../benches/projection_unnecessary.rs | 4 +- .../src/analyzer/function_rewrite.rs | 2 +- datafusion/optimizer/src/analyzer/mod.rs | 2 +- .../src/analyzer/resolve_grouping_function.rs | 8 +- .../optimizer/src/analyzer/type_coercion.rs | 35 +-- .../optimizer/src/common_subexpr_eliminate.rs | 14 +- datafusion/optimizer/src/decorrelate.rs | 6 +- .../optimizer/src/decorrelate_lateral_join.rs | 4 +- .../src/decorrelate_predicate_subquery.rs | 10 +- .../optimizer/src/eliminate_cross_join.rs | 17 +- .../src/eliminate_duplicated_expr.rs | 4 +- datafusion/optimizer/src/eliminate_filter.rs | 4 +- .../src/eliminate_group_by_constant.rs | 8 +- datafusion/optimizer/src/eliminate_join.rs | 4 +- datafusion/optimizer/src/eliminate_limit.rs | 6 +- .../optimizer/src/eliminate_outer_join.rs | 4 +- .../src/extract_equijoin_predicate.rs | 4 +- .../optimizer/src/filter_null_join_keys.rs | 6 +- datafusion/optimizer/src/join_key_set.rs | 2 +- .../optimizer/src/optimize_projections/mod.rs | 28 +-- datafusion/optimizer/src/optimize_unions.rs | 6 +- datafusion/optimizer/src/optimizer.rs | 16 +- datafusion/optimizer/src/plan_signature.rs | 2 +- .../optimizer/src/propagate_empty_relation.rs | 16 +- datafusion/optimizer/src/push_down_filter.rs | 16 +- datafusion/optimizer/src/push_down_limit.rs | 8 +- .../src/replace_distinct_aggregate.rs | 4 +- .../optimizer/src/scalar_subquery_to_join.rs | 48 ++-- .../simplify_expressions/expr_simplifier.rs | 34 +-- .../simplify_expressions/inlist_simplifier.rs | 92 ++++--- .../src/simplify_expressions/regex.rs | 20 +- .../simplify_expressions/simplify_exprs.rs | 7 +- .../src/simplify_expressions/unwrap_cast.rs | 6 +- .../src/simplify_expressions/utils.rs | 4 +- .../src/single_distinct_to_groupby.rs | 7 +- datafusion/optimizer/src/test/mod.rs | 4 +- datafusion/optimizer/src/test/user_defined.rs | 2 +- datafusion/optimizer/src/utils.rs | 6 +- .../optimizer/tests/optimizer_integration.rs | 2 +- datafusion/physical-plan/Cargo.toml | 2 +- .../benches/aggregate_vectorized.rs | 4 +- .../physical-plan/benches/partial_ordering.rs | 2 +- .../benches/sort_preserving_merge.rs | 4 +- datafusion/physical-plan/benches/spill_io.rs | 4 +- .../src/aggregates/group_values/metrics.rs | 2 +- .../src/aggregates/group_values/mod.rs | 2 +- .../group_values/multi_group_by/boolean.rs | 2 +- .../group_values/multi_group_by/bytes.rs | 10 +- .../group_values/multi_group_by/bytes_view.rs | 4 +- .../group_values/multi_group_by/mod.rs | 14 +- .../group_values/multi_group_by/primitive.rs | 4 +- .../src/aggregates/group_values/row.rs | 2 +- .../group_values/single_group_by/primitive.rs | 6 +- .../physical-plan/src/aggregates/mod.rs | 134 ++++++----- .../src/aggregates/no_grouping.rs | 12 +- .../physical-plan/src/aggregates/row_hash.rs | 34 +-- .../src/aggregates/topk/hash_table.rs | 224 ++++++++++-------- .../physical-plan/src/aggregates/topk/heap.rs | 25 +- .../src/aggregates/topk/priority_map.rs | 4 +- .../src/aggregates/topk_stream.rs | 8 +- datafusion/physical-plan/src/analyze.rs | 4 +- datafusion/physical-plan/src/async_func.rs | 19 +- datafusion/physical-plan/src/coalesce/mod.rs | 2 +- .../physical-plan/src/coalesce_partitions.rs | 6 +- datafusion/physical-plan/src/common.rs | 2 +- datafusion/physical-plan/src/coop.rs | 4 +- datafusion/physical-plan/src/display.rs | 4 +- datafusion/physical-plan/src/empty.rs | 13 +- .../physical-plan/src/execution_plan.rs | 10 +- datafusion/physical-plan/src/explain.rs | 15 +- datafusion/physical-plan/src/filter.rs | 54 +++-- .../physical-plan/src/joins/cross_join.rs | 24 +- .../physical-plan/src/joins/hash_join/exec.rs | 43 ++-- .../joins/hash_join/partitioned_hash_eval.rs | 2 +- .../src/joins/hash_join/shared_bounds.rs | 27 ++- .../src/joins/hash_join/stream.rs | 32 +-- .../physical-plan/src/joins/join_hash_map.rs | 2 +- .../src/joins/nested_loop_join.rs | 124 ++++++---- .../piecewise_merge_join/classic_join.rs | 28 +-- .../src/joins/piecewise_merge_join/exec.rs | 16 +- .../src/joins/sort_merge_join/exec.rs | 20 +- .../src/joins/sort_merge_join/stream.rs | 12 +- .../src/joins/sort_merge_join/tests.rs | 18 +- .../src/joins/stream_join_utils.rs | 96 ++++---- .../src/joins/symmetric_hash_join.rs | 28 +-- .../physical-plan/src/joins/test_utils.rs | 6 +- datafusion/physical-plan/src/joins/utils.rs | 99 ++++---- datafusion/physical-plan/src/lib.rs | 14 +- datafusion/physical-plan/src/limit.rs | 11 +- datafusion/physical-plan/src/memory.rs | 4 +- .../physical-plan/src/metrics/builder.rs | 2 +- datafusion/physical-plan/src/metrics/mod.rs | 10 +- datafusion/physical-plan/src/metrics/value.rs | 9 +- .../physical-plan/src/placeholder_row.rs | 13 +- datafusion/physical-plan/src/projection.rs | 17 +- .../physical-plan/src/recursive_query.rs | 8 +- .../src/repartition/distributor_channels.rs | 4 +- .../physical-plan/src/repartition/mod.rs | 24 +- datafusion/physical-plan/src/sorts/cursor.rs | 4 +- datafusion/physical-plan/src/sorts/merge.rs | 4 +- .../src/sorts/multi_level_merge.rs | 6 +- .../physical-plan/src/sorts/partial_sort.rs | 29 ++- datafusion/physical-plan/src/sorts/sort.rs | 66 +++--- .../src/sorts/sort_preserving_merge.rs | 30 ++- datafusion/physical-plan/src/sorts/stream.rs | 6 +- .../src/sorts/streaming_merge.rs | 2 +- .../src/spill/in_progress_spill_file.rs | 2 +- datafusion/physical-plan/src/spill/mod.rs | 20 +- .../physical-plan/src/spill/spill_manager.rs | 6 +- datafusion/physical-plan/src/stream.rs | 4 +- datafusion/physical-plan/src/streaming.rs | 8 +- datafusion/physical-plan/src/test.rs | 12 +- datafusion/physical-plan/src/test/exec.rs | 12 +- datafusion/physical-plan/src/topk/mod.rs | 18 +- datafusion/physical-plan/src/tree_node.rs | 4 +- datafusion/physical-plan/src/union.rs | 53 +++-- datafusion/physical-plan/src/unnest.rs | 12 +- .../src/windows/bounded_window_agg_exec.rs | 72 +++--- datafusion/physical-plan/src/windows/mod.rs | 56 ++--- .../src/windows/window_agg_exec.rs | 4 +- datafusion/physical-plan/src/work_table.rs | 4 +- datafusion/session/Cargo.toml | 2 +- datafusion/session/src/session.rs | 2 +- datafusion/sql/Cargo.toml | 2 +- datafusion/sql/examples/sql.rs | 6 +- datafusion/sql/src/cte.rs | 13 +- datafusion/sql/src/expr/binary_op.rs | 2 +- datafusion/sql/src/expr/function.rs | 41 ++-- datafusion/sql/src/expr/identifier.rs | 56 ++--- datafusion/sql/src/expr/mod.rs | 24 +- datafusion/sql/src/expr/order_by.rs | 2 +- datafusion/sql/src/expr/subquery.rs | 18 +- datafusion/sql/src/expr/substring.rs | 10 +- datafusion/sql/src/expr/unary_op.rs | 4 +- datafusion/sql/src/expr/value.rs | 34 ++- datafusion/sql/src/parser.rs | 65 ++--- datafusion/sql/src/planner.rs | 22 +- datafusion/sql/src/query.rs | 2 +- datafusion/sql/src/relation/join.rs | 2 +- datafusion/sql/src/relation/mod.rs | 10 +- datafusion/sql/src/select.rs | 82 ++++--- datafusion/sql/src/set_expr.rs | 2 +- datafusion/sql/src/statement.rs | 70 ++++-- datafusion/sql/src/unparser/ast.rs | 10 +- datafusion/sql/src/unparser/dialect.rs | 4 +- datafusion/sql/src/unparser/expr.rs | 45 ++-- .../sql/src/unparser/extension_unparser.rs | 2 +- datafusion/sql/src/unparser/plan.rs | 160 ++++++------- datafusion/sql/src/unparser/rewrite.rs | 13 +- datafusion/sql/src/unparser/utils.rs | 48 ++-- datafusion/sql/src/utils.rs | 45 ++-- datafusion/sql/tests/cases/collection.rs | 10 +- datafusion/sql/tests/cases/diagnostic.rs | 3 +- datafusion/sql/tests/cases/params.rs | 37 +-- datafusion/sql/tests/cases/plan_to_sql.rs | 23 +- datafusion/sql/tests/common/mod.rs | 2 +- datafusion/sql/tests/sql_integration.rs | 37 ++- 212 files changed, 1971 insertions(+), 1716 deletions(-) diff --git a/datafusion/datasource/Cargo.toml b/datafusion/datasource/Cargo.toml index 48bf30f7a448..a41c4d173c88 100644 --- a/datafusion/datasource/Cargo.toml +++ b/datafusion/datasource/Cargo.toml @@ -20,7 +20,7 @@ name = "datafusion-datasource" description = "datafusion-datasource" readme = "README.md" authors.workspace = true -edition.workspace = true +edition = "2024" homepage.workspace = true license.workspace = true repository.workspace = true diff --git a/datafusion/datasource/benches/split_groups_by_statistics.rs b/datafusion/datasource/benches/split_groups_by_statistics.rs index d51fdfc0a6e9..e2ae4a9753df 100644 --- a/datafusion/datasource/benches/split_groups_by_statistics.rs +++ b/datafusion/datasource/benches/split_groups_by_statistics.rs @@ -24,7 +24,7 @@ use datafusion_datasource::{generate_test_files, verify_sort_integrity}; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; -use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; pub fn compare_split_groups_by_statistics_algorithms(c: &mut Criterion) { let file_schema = Arc::new(Schema::new(vec![Field::new( diff --git a/datafusion/datasource/src/decoder.rs b/datafusion/datasource/src/decoder.rs index 654569f74113..9f9fc0d94bb1 100644 --- a/datafusion/datasource/src/decoder.rs +++ b/datafusion/datasource/src/decoder.rs @@ -24,9 +24,9 @@ use arrow::error::ArrowError; use bytes::Buf; use bytes::Bytes; use datafusion_common::Result; -use futures::stream::BoxStream; use futures::StreamExt as _; -use futures::{ready, Stream}; +use futures::stream::BoxStream; +use futures::{Stream, ready}; use std::collections::VecDeque; use std::fmt; use std::task::Poll; @@ -175,17 +175,19 @@ pub fn deserialize_stream<'a>( mut input: impl Stream> + Unpin + Send + 'a, mut deserializer: impl BatchDeserializer + 'a, ) -> BoxStream<'a, Result> { - futures::stream::poll_fn(move |cx| loop { - match ready!(input.poll_next_unpin(cx)).transpose()? { - Some(b) => _ = deserializer.digest(b), - None => deserializer.finish(), - }; - - return match deserializer.next()? { - DeserializerOutput::RecordBatch(rb) => Poll::Ready(Some(Ok(rb))), - DeserializerOutput::InputExhausted => Poll::Ready(None), - DeserializerOutput::RequiresMoreData => continue, - }; + futures::stream::poll_fn(move |cx| { + loop { + match ready!(input.poll_next_unpin(cx)).transpose()? { + Some(b) => _ = deserializer.digest(b), + None => deserializer.finish(), + }; + + return match deserializer.next()? { + DeserializerOutput::RecordBatch(rb) => Poll::Ready(Some(Ok(rb))), + DeserializerOutput::InputExhausted => Poll::Ready(None), + DeserializerOutput::RequiresMoreData => continue, + }; + } }) .boxed() } diff --git a/datafusion/datasource/src/display.rs b/datafusion/datasource/src/display.rs index c9e979535963..15fe8679acda 100644 --- a/datafusion/datasource/src/display.rs +++ b/datafusion/datasource/src/display.rs @@ -135,7 +135,7 @@ mod tests { use super::*; use datafusion_physical_plan::{DefaultDisplay, VerboseDisplay}; - use object_store::{path::Path, ObjectMeta}; + use object_store::{ObjectMeta, path::Path}; use crate::PartitionedFile; use chrono::Utc; diff --git a/datafusion/datasource/src/file.rs b/datafusion/datasource/src/file.rs index 3668e0e4a77e..e25a2e889e21 100644 --- a/datafusion/datasource/src/file.rs +++ b/datafusion/datasource/src/file.rs @@ -27,12 +27,12 @@ use crate::file_scan_config::FileScanConfig; use crate::file_stream::FileOpener; use crate::schema_adapter::SchemaAdapterFactory; use datafusion_common::config::ConfigOptions; -use datafusion_common::{not_impl_err, Result}; +use datafusion_common::{Result, not_impl_err}; use datafusion_physical_expr::projection::ProjectionExprs; use datafusion_physical_expr::{LexOrdering, PhysicalExpr}; +use datafusion_physical_plan::DisplayFormatType; use datafusion_physical_plan::filter_pushdown::{FilterPushdownPropagation, PushedDown}; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; -use datafusion_physical_plan::DisplayFormatType; use object_store::ObjectStore; diff --git a/datafusion/datasource/src/file_compression_type.rs b/datafusion/datasource/src/file_compression_type.rs index 9ca5d8763b74..ebc0e7821f98 100644 --- a/datafusion/datasource/src/file_compression_type.rs +++ b/datafusion/datasource/src/file_compression_type.rs @@ -21,8 +21,8 @@ use std::str::FromStr; use datafusion_common::error::{DataFusionError, Result}; -use datafusion_common::parsers::CompressionTypeVariant::{self, *}; use datafusion_common::GetExt; +use datafusion_common::parsers::CompressionTypeVariant::{self, *}; #[cfg(feature = "compression")] use async_compression::tokio::bufread::{ @@ -39,10 +39,10 @@ use bytes::Bytes; use bzip2::read::MultiBzDecoder; #[cfg(feature = "compression")] use flate2::read::MultiGzDecoder; -use futures::stream::BoxStream; use futures::StreamExt; #[cfg(feature = "compression")] use futures::TryStreamExt; +use futures::stream::BoxStream; #[cfg(feature = "compression")] use liblzma::read::XzDecoder; use object_store::buffered::BufWriter; @@ -148,7 +148,7 @@ impl FileCompressionType { GZIP | BZIP2 | XZ | ZSTD => { return Err(DataFusionError::NotImplemented( "Compression feature is not enabled".to_owned(), - )) + )); } UNCOMPRESSED => s.boxed(), }) @@ -173,7 +173,7 @@ impl FileCompressionType { GZIP | BZIP2 | XZ | ZSTD => { return Err(DataFusionError::NotImplemented( "Compression feature is not enabled".to_owned(), - )) + )); } UNCOMPRESSED => Box::new(w), }) @@ -210,7 +210,7 @@ impl FileCompressionType { GZIP | BZIP2 | XZ | ZSTD => { return Err(DataFusionError::NotImplemented( "Compression feature is not enabled".to_owned(), - )) + )); } UNCOMPRESSED => s.boxed(), }) @@ -237,7 +237,7 @@ impl FileCompressionType { GZIP | BZIP2 | XZ | ZSTD => { return Err(DataFusionError::NotImplemented( "Compression feature is not enabled".to_owned(), - )) + )); } UNCOMPRESSED => Box::new(r), }) diff --git a/datafusion/datasource/src/file_format.rs b/datafusion/datasource/src/file_format.rs index bb4ffded8086..54389ecd214e 100644 --- a/datafusion/datasource/src/file_format.rs +++ b/datafusion/datasource/src/file_format.rs @@ -30,7 +30,7 @@ use crate::file_sink_config::FileSinkConfig; use arrow::datatypes::SchemaRef; use datafusion_common::file_options::file_type::FileType; -use datafusion_common::{internal_err, not_impl_err, GetExt, Result, Statistics}; +use datafusion_common::{GetExt, Result, Statistics, internal_err, not_impl_err}; use datafusion_physical_expr::LexRequirement; use datafusion_physical_plan::ExecutionPlan; use datafusion_session::Session; diff --git a/datafusion/datasource/src/file_groups.rs b/datafusion/datasource/src/file_groups.rs index 579241294c71..04cb909625d8 100644 --- a/datafusion/datasource/src/file_groups.rs +++ b/datafusion/datasource/src/file_groups.rs @@ -20,7 +20,7 @@ use crate::{FileRange, PartitionedFile}; use datafusion_common::Statistics; use itertools::Itertools; -use std::cmp::{min, Ordering}; +use std::cmp::{Ordering, min}; use std::collections::BinaryHeap; use std::iter::repeat_with; use std::mem; diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs index 47a594f424b3..13cc9aacae67 100644 --- a/datafusion/datasource/src/file_scan_config.rs +++ b/datafusion/datasource/src/file_scan_config.rs @@ -20,32 +20,32 @@ use crate::file_groups::FileGroup; use crate::{ - display::FileGroupsDisplay, file::FileSource, + PartitionedFile, display::FileGroupsDisplay, file::FileSource, file_compression_type::FileCompressionType, file_stream::FileStream, - source::DataSource, statistics::MinMaxStatistics, PartitionedFile, + source::DataSource, statistics::MinMaxStatistics, }; use arrow::datatypes::FieldRef; use arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion_common::config::ConfigOptions; use datafusion_common::{ - internal_datafusion_err, internal_err, Constraints, Result, ScalarValue, Statistics, + Constraints, Result, ScalarValue, Statistics, internal_datafusion_err, internal_err, }; use datafusion_execution::{ - object_store::ObjectStoreUrl, SendableRecordBatchStream, TaskContext, + SendableRecordBatchStream, TaskContext, object_store::ObjectStoreUrl, }; use datafusion_expr::Operator; use datafusion_physical_expr::expressions::BinaryExpr; use datafusion_physical_expr::projection::ProjectionExprs; use datafusion_physical_expr::utils::reassign_expr_columns; -use datafusion_physical_expr::{split_conjunction, EquivalenceProperties, Partitioning}; +use datafusion_physical_expr::{EquivalenceProperties, Partitioning, split_conjunction}; use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::{ - display::{display_orderings, ProjectSchemaDisplay}, + DisplayAs, DisplayFormatType, + display::{ProjectSchemaDisplay, display_orderings}, filter_pushdown::FilterPushdownPropagation, metrics::ExecutionPlanMetricsSet, - DisplayAs, DisplayFormatType, }; use std::{any::Any, fmt::Debug, fmt::Formatter, fmt::Result as FmtResult, sync::Arc}; @@ -303,7 +303,9 @@ impl FileScanConfigBuilder { match self.clone().with_projection_indices(indices) { Ok(builder) => builder, Err(e) => { - warn!("Failed to push down projection in FileScanConfigBuilder::with_projection: {e}"); + warn!( + "Failed to push down projection in FileScanConfigBuilder::with_projection: {e}" + ); self } } @@ -643,16 +645,16 @@ impl DataSource for FileScanConfig { fn partition_statistics(&self, partition: Option) -> Result { if let Some(partition) = partition { // Get statistics for a specific partition - if let Some(file_group) = self.file_groups.get(partition) { - if let Some(stat) = file_group.file_statistics(None) { - // Project the statistics based on the projection - let output_schema = self.projected_schema()?; - return if let Some(projection) = self.file_source.projection() { - projection.project_statistics(stat.clone(), &output_schema) - } else { - Ok(stat.clone()) - }; - } + if let Some(file_group) = self.file_groups.get(partition) + && let Some(stat) = file_group.file_statistics(None) + { + // Project the statistics based on the projection + let output_schema = self.projected_schema()?; + return if let Some(projection) = self.file_source.projection() { + projection.project_statistics(stat.clone(), &output_schema) + } else { + Ok(stat.clone()) + }; } // If no statistics available for this partition, return unknown Ok(Statistics::new_unknown(self.projected_schema()?.as_ref())) @@ -1217,8 +1219,8 @@ mod tests { use std::collections::HashMap; use super::*; - use crate::test_util::col; use crate::TableSchema; + use crate::test_util::col; use crate::{ generate_test_files, test_util::MockSource, tests::aggr_test_schema, verify_sort_integrity, @@ -1226,7 +1228,7 @@ mod tests { use arrow::datatypes::Field; use datafusion_common::stats::Precision; - use datafusion_common::{internal_err, ColumnStatistics}; + use datafusion_common::{ColumnStatistics, internal_err}; use datafusion_expr::{Operator, SortExpr}; use datafusion_physical_expr::create_physical_sort_expr; use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal}; @@ -1267,7 +1269,7 @@ mod tests { use chrono::TimeZone; use datafusion_common::DFSchema; use datafusion_expr::execution_props::ExecutionProps; - use object_store::{path::Path, ObjectMeta}; + use object_store::{ObjectMeta, path::Path}; struct File { name: &'static str, @@ -1368,12 +1370,16 @@ mod tests { true, )]), files: vec![ - File::new_nullable("0", "2023-01-01", vec![Some((Some(0.00), Some(0.49)))]), + File::new_nullable( + "0", + "2023-01-01", + vec![Some((Some(0.00), Some(0.49)))], + ), File::new_nullable("1", "2023-01-01", vec![Some((Some(0.50), None))]), File::new_nullable("2", "2023-01-02", vec![Some((Some(0.00), None))]), ], sort: vec![col("value").sort(true, false)], - expected_result: Ok(vec![vec!["0", "1"], vec!["2"]]) + expected_result: Ok(vec![vec!["0", "1"], vec!["2"]]), }, TestCase { name: "nullable sort columns, nulls first", @@ -1384,11 +1390,15 @@ mod tests { )]), files: vec![ File::new_nullable("0", "2023-01-01", vec![Some((None, Some(0.49)))]), - File::new_nullable("1", "2023-01-01", vec![Some((Some(0.50), Some(1.00)))]), + File::new_nullable( + "1", + "2023-01-01", + vec![Some((Some(0.50), Some(1.00)))], + ), File::new_nullable("2", "2023-01-02", vec![Some((None, Some(1.00)))]), ], sort: vec![col("value").sort(true, true)], - expected_result: Ok(vec![vec!["0", "1"], vec!["2"]]) + expected_result: Ok(vec![vec!["0", "1"], vec!["2"]]), }, TestCase { name: "all three non-overlapping", @@ -1444,7 +1454,9 @@ mod tests { File::new("2", "2023-01-02", vec![None]), ], sort: vec![col("value").sort(true, false)], - expected_result: Err("construct min/max statistics for split_groups_by_statistics\ncaused by\ncollect min/max values\ncaused by\nget min/max for column: 'value'\ncaused by\nError during planning: statistics not found"), + expected_result: Err( + "construct min/max statistics for split_groups_by_statistics\ncaused by\ncollect min/max values\ncaused by\nget min/max for column: 'value'\ncaused by\nError during planning: statistics not found", + ), }, ]; @@ -1621,10 +1633,12 @@ mod tests { "test.parquet".to_string(), 1024, )])]) - .with_output_ordering(vec![[PhysicalSortExpr::new_default(Arc::new( - Column::new("date", 0), - ))] - .into()]) + .with_output_ordering(vec![ + [PhysicalSortExpr::new_default(Arc::new(Column::new( + "date", 0, + )))] + .into(), + ]) .with_file_compression_type(FileCompressionType::UNCOMPRESSED) .with_newlines_in_values(true) .build(); diff --git a/datafusion/datasource/src/file_sink_config.rs b/datafusion/datasource/src/file_sink_config.rs index 2968bd1ee044..643831a1199f 100644 --- a/datafusion/datasource/src/file_sink_config.rs +++ b/datafusion/datasource/src/file_sink_config.rs @@ -17,10 +17,10 @@ use std::sync::Arc; +use crate::ListingTableUrl; use crate::file_groups::FileGroup; use crate::sink::DataSink; -use crate::write::demux::{start_demuxer_task, DemuxedStreamReceiver}; -use crate::ListingTableUrl; +use crate::write::demux::{DemuxedStreamReceiver, start_demuxer_task}; use arrow::datatypes::{DataType, SchemaRef}; use datafusion_common::Result; diff --git a/datafusion/datasource/src/file_stream.rs b/datafusion/datasource/src/file_stream.rs index 3ac70a5f75cc..c8090382094e 100644 --- a/datafusion/datasource/src/file_stream.rs +++ b/datafusion/datasource/src/file_stream.rs @@ -27,8 +27,8 @@ use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use crate::file_scan_config::FileScanConfig; use crate::PartitionedFile; +use crate::file_scan_config::FileScanConfig; use arrow::datatypes::SchemaRef; use datafusion_common::error::Result; use datafusion_execution::RecordBatchStream; @@ -41,7 +41,7 @@ use datafusion_common::instant::Instant; use futures::future::BoxFuture; use futures::stream::BoxStream; -use futures::{ready, FutureExt as _, Stream, StreamExt as _}; +use futures::{FutureExt as _, Stream, StreamExt as _, ready}; /// A stream that iterates record batch by record batch, file over file. pub struct FileStream { @@ -162,12 +162,11 @@ impl FileStream { }, FileStreamState::Scan { reader, next } => { // We need to poll the next `FileOpenFuture` here to drive it forward - if let Some(next_open_future) = next { - if let NextOpen::Pending(f) = next_open_future { - if let Poll::Ready(reader) = f.as_mut().poll(cx) { - *next_open_future = NextOpen::Ready(reader); - } - } + if let Some(next_open_future) = next + && let NextOpen::Pending(f) = next_open_future + && let Poll::Ready(reader) = f.as_mut().poll(cx) + { + *next_open_future = NextOpen::Ready(reader); } match ready!(reader.poll_next_unpin(cx)) { Some(Ok(batch)) => { @@ -250,7 +249,7 @@ impl FileStream { } } FileStreamState::Error | FileStreamState::Limit => { - return Poll::Ready(None) + return Poll::Ready(None); } } } @@ -447,15 +446,15 @@ impl FileStreamMetrics { #[cfg(test)] mod tests { + use crate::PartitionedFile; use crate::file_scan_config::FileScanConfigBuilder; use crate::tests::make_partition; - use crate::PartitionedFile; use datafusion_common::error::Result; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; use futures::{FutureExt as _, StreamExt as _}; - use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; use crate::file_stream::{FileOpenFuture, FileOpener, FileStream, OnError}; use crate::test_util::MockSource; diff --git a/datafusion/datasource/src/memory.rs b/datafusion/datasource/src/memory.rs index e499c483db22..e0635435e9d0 100644 --- a/datafusion/datasource/src/memory.rs +++ b/datafusion/datasource/src/memory.rs @@ -30,7 +30,7 @@ use crate::source::{DataSource, DataSourceExec}; use arrow::array::{RecordBatch, RecordBatchOptions}; use arrow::datatypes::{Schema, SchemaRef}; use datafusion_common::{ - assert_or_internal_err, plan_err, project_schema, Result, ScalarValue, + Result, ScalarValue, assert_or_internal_err, plan_err, project_schema, }; use datafusion_execution::TaskContext; use datafusion_physical_expr::equivalence::project_orderings; @@ -42,8 +42,8 @@ use datafusion_physical_plan::projection::{ all_alias_free_columns, new_projections_for_columns, }; use datafusion_physical_plan::{ - common, ColumnarValue, DisplayAs, DisplayFormatType, Partitioning, PhysicalExpr, - SendableRecordBatchStream, Statistics, + ColumnarValue, DisplayAs, DisplayFormatType, Partitioning, PhysicalExpr, + SendableRecordBatchStream, Statistics, common, }; use async_trait::async_trait; @@ -120,10 +120,10 @@ impl DataSource for MemorySourceConfig { .map_or(String::new(), |limit| format!(", fetch={limit}")); if self.show_sizes { write!( - f, - "partitions={}, partition_sizes={partition_sizes:?}{limit}{output_ordering}{constraints}", - partition_sizes.len(), - ) + f, + "partitions={}, partition_sizes={partition_sizes:?}{limit}{output_ordering}{constraints}", + partition_sizes.len(), + ) } else { write!( f, @@ -1083,8 +1083,7 @@ mod tests { let actual = partitioned_datasrc .map(|datasrc| datasrc.output_partitioning().partition_count()); assert_eq!( - actual, - partition_cnt, + actual, partition_cnt, "partitioned datasrc does not match expected, we expected {should_exist}, instead found {actual:?}" ); } @@ -1270,8 +1269,8 @@ mod tests { } #[test] - fn test_repartition_no_sort_information_no_output_ordering_lopsized_batches( - ) -> Result<()> { + fn test_repartition_no_sort_information_no_output_ordering_lopsized_batches() + -> Result<()> { let no_sort = vec![]; let no_output_ordering = None; diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs index d02166b92d44..85f3418d1ea9 100644 --- a/datafusion/datasource/src/mod.rs +++ b/datafusion/datasource/src/mod.rs @@ -56,11 +56,11 @@ pub use self::url::ListingTableUrl; use crate::file_groups::FileGroup; use chrono::TimeZone; use datafusion_common::stats::Precision; -use datafusion_common::{exec_datafusion_err, ColumnStatistics, Result}; +use datafusion_common::{ColumnStatistics, Result, exec_datafusion_err}; use datafusion_common::{ScalarValue, Statistics}; use futures::{Stream, StreamExt}; -use object_store::{path::Path, ObjectMeta}; use object_store::{GetOptions, GetRange, ObjectStore}; +use object_store::{ObjectMeta, path::Path}; pub use table_schema::TableSchema; // Remove when add_row_stats is remove #[expect(deprecated)] @@ -578,12 +578,13 @@ mod tests { // as per documentation, when `ignore_subdirectory` is true, we should ignore files that aren't // a direct child of the `url` - assert!(url - .contains( + assert!( + url.contains( &Path::parse("/var/data/mytable/mysubfolder/data.parquet").unwrap(), true ) - .not()); + .not() + ); // when we set `ignore_subdirectory` to false, we should not ignore the file assert!(url.contains( diff --git a/datafusion/datasource/src/projection.rs b/datafusion/datasource/src/projection.rs index 9e3139f4fbd3..9a0cb494e495 100644 --- a/datafusion/datasource/src/projection.rs +++ b/datafusion/datasource/src/projection.rs @@ -19,8 +19,8 @@ use std::sync::Arc; use arrow::datatypes::{Schema, SchemaRef}; use datafusion_common::{ - tree_node::{Transformed, TransformedResult, TreeNode}, Result, ScalarValue, + tree_node::{Transformed, TransformedResult, TreeNode}, }; use datafusion_physical_expr::{ expressions::{Column, Literal}, @@ -30,8 +30,8 @@ use futures::{FutureExt, StreamExt}; use itertools::Itertools; use crate::{ - file_stream::{FileOpenFuture, FileOpener}, PartitionedFile, TableSchema, + file_stream::{FileOpenFuture, FileOpener}, }; /// A file opener that handles applying a projection on top of an inner opener. @@ -250,11 +250,10 @@ impl SplitProjection { let expr = Arc::clone(&proj_expr.expr) .transform(|expr| { let original_expr = Arc::clone(&expr); - if let Some(column) = expr.as_any().downcast_ref::() { - if let Some(new_column) = column_mapping.get(&column.index()) - { - return Ok(Transformed::yes(Arc::clone(new_column))); - } + if let Some(column) = expr.as_any().downcast_ref::() + && let Some(new_column) = column_mapping.get(&column.index()) + { + return Ok(Transformed::yes(Arc::clone(new_column))); } Ok(Transformed::no(original_expr)) }) @@ -290,8 +289,8 @@ mod test { use arrow::array::AsArray; use arrow::datatypes::{DataType, SchemaRef}; - use datafusion_common::{record_batch, DFSchema, ScalarValue}; - use datafusion_expr::{col, execution_props::ExecutionProps, Expr}; + use datafusion_common::{DFSchema, ScalarValue, record_batch}; + use datafusion_expr::{Expr, col, execution_props::ExecutionProps}; use datafusion_physical_expr::{create_physical_exprs, projection::ProjectionExpr}; use itertools::Itertools; diff --git a/datafusion/datasource/src/schema_adapter.rs b/datafusion/datasource/src/schema_adapter.rs index 4c7b37113d58..e132fbcd8ff7 100644 --- a/datafusion/datasource/src/schema_adapter.rs +++ b/datafusion/datasource/src/schema_adapter.rs @@ -21,14 +21,15 @@ //! physical format into how they should be used by DataFusion. For instance, a schema //! can be stored external to a parquet file that maps parquet logical types to arrow types. use arrow::{ - array::{new_null_array, ArrayRef, RecordBatch, RecordBatchOptions}, + array::{ArrayRef, RecordBatch, RecordBatchOptions, new_null_array}, compute::can_cast_types, datatypes::{DataType, Field, Schema, SchemaRef}, }; use datafusion_common::{ + ColumnStatistics, format::DEFAULT_CAST_OPTIONS, nested_struct::{cast_column, validate_struct_compatibility}, - plan_err, ColumnStatistics, + plan_err, }; use std::{fmt::Debug, sync::Arc}; /// Function used by [`SchemaMapping`] to adapt a column from the file schema to @@ -343,11 +344,10 @@ where for (file_idx, file_field) in file_schema.fields.iter().enumerate() { if let Some((table_idx, table_field)) = projected_table_schema.fields().find(file_field.name()) + && can_map_field(file_field, table_field)? { - if can_map_field(file_field, table_field)? { - field_mappings[table_idx] = Some(projection.len()); - projection.push(file_idx); - } + field_mappings[table_idx] = Some(projection.len()); + projection.push(file_idx); } } @@ -487,7 +487,7 @@ mod tests { datatypes::{DataType, Field, TimeUnit}, record_batch::RecordBatch, }; - use datafusion_common::{stats::Precision, Result, ScalarValue, Statistics}; + use datafusion_common::{Result, ScalarValue, Statistics, stats::Precision}; #[test] fn test_schema_mapping_map_statistics_basic() { diff --git a/datafusion/datasource/src/sink.rs b/datafusion/datasource/src/sink.rs index f66fbc408c68..5460a0ffdc3d 100644 --- a/datafusion/datasource/src/sink.rs +++ b/datafusion/datasource/src/sink.rs @@ -24,15 +24,15 @@ use std::sync::Arc; use arrow::array::{ArrayRef, RecordBatch, UInt64Array}; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; -use datafusion_common::{assert_eq_or_internal_err, Result}; +use datafusion_common::{Result, assert_eq_or_internal_err}; use datafusion_execution::TaskContext; use datafusion_physical_expr::{Distribution, EquivalenceProperties}; use datafusion_physical_expr_common::sort_expr::{LexRequirement, OrderingRequirements}; use datafusion_physical_plan::metrics::MetricsSet; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::{ - execute_input_stream, DisplayAs, DisplayFormatType, ExecutionPlan, - ExecutionPlanProperties, Partitioning, PlanProperties, SendableRecordBatchStream, + DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning, + PlanProperties, SendableRecordBatchStream, execute_input_stream, }; use async_trait::async_trait; diff --git a/datafusion/datasource/src/statistics.rs b/datafusion/datasource/src/statistics.rs index 17a677552687..09add904b51a 100644 --- a/datafusion/datasource/src/statistics.rs +++ b/datafusion/datasource/src/statistics.rs @@ -22,8 +22,8 @@ use std::sync::Arc; -use crate::file_groups::FileGroup; use crate::PartitionedFile; +use crate::file_groups::FileGroup; use arrow::array::RecordBatch; use arrow::compute::SortColumn; @@ -31,7 +31,7 @@ use arrow::datatypes::SchemaRef; use arrow::row::{Row, Rows}; use datafusion_common::stats::Precision; use datafusion_common::{ - plan_datafusion_err, plan_err, DataFusionError, Result, ScalarValue, + DataFusionError, Result, ScalarValue, plan_datafusion_err, plan_err, }; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; diff --git a/datafusion/datasource/src/test_util.rs b/datafusion/datasource/src/test_util.rs index 6806cd73996f..a7eb7fd1c495 100644 --- a/datafusion/datasource/src/test_util.rs +++ b/datafusion/datasource/src/test_util.rs @@ -24,7 +24,7 @@ use std::sync::Arc; use arrow::datatypes::Schema; use datafusion_common::Result; -use datafusion_physical_expr::{expressions::Column, PhysicalExpr}; +use datafusion_physical_expr::{PhysicalExpr, expressions::Column}; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; use object_store::ObjectStore; diff --git a/datafusion/datasource/src/url.rs b/datafusion/datasource/src/url.rs index 1307a4c8b1eb..4641df671e44 100644 --- a/datafusion/datasource/src/url.rs +++ b/datafusion/datasource/src/url.rs @@ -26,8 +26,8 @@ use futures::{StreamExt, TryStreamExt}; use glob::Pattern; use itertools::Itertools; use log::debug; -use object_store::path::Path; use object_store::path::DELIMITER; +use object_store::path::Path; use object_store::{ObjectMeta, ObjectStore}; use url::Url; @@ -209,12 +209,12 @@ impl ListingTableUrl { /// assert_eq!(url.file_extension(), None); /// ``` pub fn file_extension(&self) -> Option<&str> { - if let Some(mut segments) = self.url.path_segments() { - if let Some(last_segment) = segments.next_back() { - if last_segment.contains(".") && !last_segment.ends_with(".") { - return last_segment.split('.').next_back(); - } - } + if let Some(mut segments) = self.url.path_segments() + && let Some(last_segment) = segments.next_back() + && last_segment.contains(".") + && !last_segment.ends_with(".") + { + return last_segment.split('.').next_back(); } None @@ -430,11 +430,11 @@ mod tests { use super::*; use async_trait::async_trait; use bytes::Bytes; - use datafusion_common::config::TableOptions; use datafusion_common::DFSchema; + use datafusion_common::config::TableOptions; + use datafusion_execution::TaskContext; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnv; - use datafusion_execution::TaskContext; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::{AggregateUDF, Expr, LogicalPlan, ScalarUDF, WindowUDF}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; diff --git a/datafusion/datasource/src/write/demux.rs b/datafusion/datasource/src/write/demux.rs index 3fe6149b58b2..5e4962aa48b1 100644 --- a/datafusion/datasource/src/write/demux.rs +++ b/datafusion/datasource/src/write/demux.rs @@ -28,15 +28,15 @@ use datafusion_common::error::Result; use datafusion_physical_plan::SendableRecordBatchStream; use arrow::array::{ - builder::UInt64Builder, cast::AsArray, downcast_dictionary_array, ArrayAccessor, - RecordBatch, StringArray, StructArray, + ArrayAccessor, RecordBatch, StringArray, StructArray, builder::UInt64Builder, + cast::AsArray, downcast_dictionary_array, }; use arrow::datatypes::{DataType, Schema}; use datafusion_common::cast::{ as_boolean_array, as_date32_array, as_date64_array, as_float16_array, - as_float32_array, as_float64_array, as_int16_array, as_int32_array, as_int64_array, - as_int8_array, as_string_array, as_string_view_array, as_uint16_array, - as_uint32_array, as_uint64_array, as_uint8_array, + as_float32_array, as_float64_array, as_int8_array, as_int16_array, as_int32_array, + as_int64_array, as_string_array, as_string_view_array, as_uint8_array, + as_uint16_array, as_uint32_array, as_uint64_array, }; use datafusion_common::{exec_datafusion_err, internal_datafusion_err, not_impl_err}; use datafusion_common_runtime::SpawnedTask; @@ -502,9 +502,9 @@ fn compute_partition_keys_by_row<'a>( } _ => { return not_impl_err!( - "it is not yet supported to write to hive partitions with datatype {}", - dtype - ) + "it is not yet supported to write to hive partitions with datatype {}", + dtype + ); } } diff --git a/datafusion/datasource/src/write/mod.rs b/datafusion/datasource/src/write/mod.rs index 85832f81bc18..ca65477c498c 100644 --- a/datafusion/datasource/src/write/mod.rs +++ b/datafusion/datasource/src/write/mod.rs @@ -28,9 +28,9 @@ use datafusion_common::error::Result; use arrow::array::RecordBatch; use arrow::datatypes::Schema; use bytes::Bytes; +use object_store::ObjectStore; use object_store::buffered::BufWriter; use object_store::path::Path; -use object_store::ObjectStore; use tokio::io::AsyncWrite; pub mod demux; diff --git a/datafusion/datasource/src/write/orchestration.rs b/datafusion/datasource/src/write/orchestration.rs index ab836b7b7f38..1672817de015 100644 --- a/datafusion/datasource/src/write/orchestration.rs +++ b/datafusion/datasource/src/write/orchestration.rs @@ -28,7 +28,7 @@ use datafusion_common::error::Result; use arrow::array::RecordBatch; use datafusion_common::{ - exec_datafusion_err, internal_datafusion_err, internal_err, DataFusionError, + DataFusionError, exec_datafusion_err, internal_datafusion_err, internal_err, }; use datafusion_common_runtime::{JoinSet, SpawnedTask}; use datafusion_execution::TaskContext; @@ -120,7 +120,7 @@ pub(crate) async fn serialize_rb_stream_to_object_store( return SerializedRecordBatchResult::failure( None, exec_datafusion_err!("Error writing to object store: {e}"), - ) + ); } }; row_count += cnt; @@ -148,7 +148,7 @@ pub(crate) async fn serialize_rb_stream_to_object_store( return SerializedRecordBatchResult::failure( Some(writer), internal_datafusion_err!("Unknown error writing to object store"), - ) + ); } } SerializedRecordBatchResult::success(writer, row_count) @@ -216,12 +216,20 @@ pub(crate) async fn stateless_serialize_and_write_files( } if any_errors { - match any_abort_errors{ - true => return internal_err!("Error encountered during writing to ObjectStore and failed to abort all writers. Partial result may have been written."), + match any_abort_errors { + true => { + return internal_err!( + "Error encountered during writing to ObjectStore and failed to abort all writers. Partial result may have been written." + ); + } false => match triggering_error { Some(e) => return Err(e), - None => return internal_err!("Unknown Error encountered during writing to ObjectStore. All writers successfully aborted.") - } + None => { + return internal_err!( + "Unknown Error encountered during writing to ObjectStore. All writers successfully aborted." + ); + } + }, } } diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index 6b0241a10a54..52cddadd0af5 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -21,7 +21,7 @@ description = "Nested Type Function packages for the DataFusion query engine" keywords = ["datafusion", "logical", "plan", "expressions"] readme = "README.md" version = { workspace = true } -edition = { workspace = true } +edition = "2024" homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } diff --git a/datafusion/functions-nested/benches/array_slice.rs b/datafusion/functions-nested/benches/array_slice.rs index bdbbeb837ee5..858e43899619 100644 --- a/datafusion/functions-nested/benches/array_slice.rs +++ b/datafusion/functions-nested/benches/array_slice.rs @@ -22,9 +22,9 @@ use arrow::array::{ }; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{DataType, Field, Int64Type}; -use criterion::{criterion_group, criterion_main, Criterion}; -use datafusion_common::config::ConfigOptions; +use criterion::{Criterion, criterion_group, criterion_main}; use datafusion_common::ScalarValue; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions_nested::extract::array_slice_udf; use rand::rngs::StdRng; diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs index 3197cc55cc95..75b4045a193d 100644 --- a/datafusion/functions-nested/benches/map.rs +++ b/datafusion/functions-nested/benches/map.rs @@ -20,15 +20,15 @@ extern crate criterion; use arrow::array::{Int32Array, ListArray, StringArray}; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{DataType, Field}; -use criterion::{criterion_group, criterion_main, Criterion}; -use datafusion_common::config::ConfigOptions; +use criterion::{Criterion, criterion_group, criterion_main}; use datafusion_common::ScalarValue; +use datafusion_common::config::ConfigOptions; use datafusion_expr::planner::ExprPlanner; use datafusion_expr::{ColumnarValue, Expr, ScalarFunctionArgs}; use datafusion_functions_nested::map::map_udf; use datafusion_functions_nested::planner::NestedFunctionPlanner; -use rand::prelude::ThreadRng; use rand::Rng; +use rand::prelude::ThreadRng; use std::collections::HashSet; use std::hint::black_box; use std::sync::Arc; diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 8ae8c42b79d5..54b94abafb99 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -24,11 +24,11 @@ use arrow::row::{RowConverter, Rows, SortField}; use datafusion_common::cast::{as_fixed_size_list_array, as_generic_list_array}; use datafusion_common::utils::string_utils::string_array_to_vec; use datafusion_common::utils::take_function_args; -use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue}; +use datafusion_common::{DataFusionError, Result, ScalarValue, exec_err}; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::simplify::ExprSimplifyResult; use datafusion_expr::{ - in_list, ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, + ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, in_list, }; use datafusion_macros::user_doc; use datafusion_physical_expr_common::datum::compare_with_eq; @@ -136,7 +136,7 @@ impl ScalarUDFImpl for ArrayHas { return Ok(ExprSimplifyResult::Simplified(Expr::Literal( ScalarValue::Boolean(None), None, - ))) + ))); } Expr::Literal( // FixedSizeList gets coerced to List @@ -366,11 +366,11 @@ fn array_has_dispatch_for_scalar( let length = end - start; // Check if the array at this position is null - if let Some(validity_buffer) = validity { - if !validity_buffer.is_valid(i) { - final_contained[i] = None; // null array -> null result - continue; - } + if let Some(validity_buffer) = validity + && !validity_buffer.is_valid(i) + { + final_contained[i] = None; // null array -> null result + continue; } // For non-null arrays: length is 0 for empty arrays @@ -675,17 +675,17 @@ mod tests { use arrow::datatypes::Int32Type; use arrow::{ - array::{create_array, Array, ArrayRef, AsArray, Int32Array, ListArray}, + array::{Array, ArrayRef, AsArray, Int32Array, ListArray, create_array}, buffer::OffsetBuffer, datatypes::{DataType, Field}, }; use datafusion_common::{ - config::ConfigOptions, utils::SingleRowListArrayBuilder, DataFusionError, - ScalarValue, + DataFusionError, ScalarValue, config::ConfigOptions, + utils::SingleRowListArrayBuilder, }; use datafusion_expr::{ - col, execution_props::ExecutionProps, lit, simplify::ExprSimplifyResult, - ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, + ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, col, + execution_props::ExecutionProps, lit, simplify::ExprSimplifyResult, }; use crate::expr_fn::make_array; diff --git a/datafusion/functions-nested/src/cardinality.rs b/datafusion/functions-nested/src/cardinality.rs index 58a83feb6676..c467686b865c 100644 --- a/datafusion/functions-nested/src/cardinality.rs +++ b/datafusion/functions-nested/src/cardinality.rs @@ -25,10 +25,10 @@ use arrow::datatypes::{ DataType, DataType::{LargeList, List, Map, Null, UInt64}, }; +use datafusion_common::Result; use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array}; use datafusion_common::exec_err; -use datafusion_common::utils::{take_function_args, ListCoercion}; -use datafusion_common::Result; +use datafusion_common::utils::{ListCoercion, take_function_args}; use datafusion_expr::{ ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, diff --git a/datafusion/functions-nested/src/concat.rs b/datafusion/functions-nested/src/concat.rs index a565006a2577..0a7402060a0e 100644 --- a/datafusion/functions-nested/src/concat.rs +++ b/datafusion/functions-nested/src/concat.rs @@ -28,10 +28,10 @@ use arrow::array::{ }; use arrow::buffer::OffsetBuffer; use arrow::datatypes::{DataType, Field}; +use datafusion_common::Result; use datafusion_common::utils::{ - base_type, coerced_type_with_base_type_only, ListCoercion, + ListCoercion, base_type, coerced_type_with_base_type_only, }; -use datafusion_common::Result; use datafusion_common::{ cast::as_generic_list_array, exec_err, plan_err, @@ -297,7 +297,7 @@ impl ScalarUDFImpl for ArrayConcat { DataType::Null | DataType::List(_) | DataType::FixedSizeList(..) => (), DataType::LargeList(_) => large_list = true, arg_type => { - return plan_err!("{} does not support type {arg_type}", self.name()) + return plan_err!("{} does not support type {arg_type}", self.name()); } } diff --git a/datafusion/functions-nested/src/dimension.rs b/datafusion/functions-nested/src/dimension.rs index d0fa294fe42d..93fd57afd81c 100644 --- a/datafusion/functions-nested/src/dimension.rs +++ b/datafusion/functions-nested/src/dimension.rs @@ -28,7 +28,7 @@ use std::any::Any; use datafusion_common::cast::{ as_fixed_size_list_array, as_large_list_array, as_list_array, }; -use datafusion_common::{exec_err, utils::take_function_args, Result}; +use datafusion_common::{Result, exec_err, utils::take_function_args}; use crate::utils::{compute_array_dims, make_scalar_function}; use datafusion_common::utils::list_ndims; diff --git a/datafusion/functions-nested/src/distance.rs b/datafusion/functions-nested/src/distance.rs index dc8eaa699f87..f9c42c414423 100644 --- a/datafusion/functions-nested/src/distance.rs +++ b/datafusion/functions-nested/src/distance.rs @@ -29,8 +29,8 @@ use datafusion_common::cast::{ as_float32_array, as_float64_array, as_generic_list_array, as_int32_array, as_int64_array, }; -use datafusion_common::utils::{coerced_type_with_base_type_only, ListCoercion}; -use datafusion_common::{exec_err, plan_err, utils::take_function_args, Result}; +use datafusion_common::utils::{ListCoercion, coerced_type_with_base_type_only}; +use datafusion_common::{Result, exec_err, plan_err, utils::take_function_args}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; diff --git a/datafusion/functions-nested/src/empty.rs b/datafusion/functions-nested/src/empty.rs index 3f9077575205..ca16e494b12d 100644 --- a/datafusion/functions-nested/src/empty.rs +++ b/datafusion/functions-nested/src/empty.rs @@ -25,7 +25,7 @@ use arrow::datatypes::{ DataType::{Boolean, FixedSizeList, LargeList, List}, }; use datafusion_common::cast::as_generic_list_array; -use datafusion_common::{exec_err, utils::take_function_args, Result}; +use datafusion_common::{Result, exec_err, utils::take_function_args}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; diff --git a/datafusion/functions-nested/src/except.rs b/datafusion/functions-nested/src/except.rs index 8b6bcaa0620c..a8ac997ce33e 100644 --- a/datafusion/functions-nested/src/except.rs +++ b/datafusion/functions-nested/src/except.rs @@ -18,12 +18,12 @@ //! [`ScalarUDFImpl`] definitions for array_except function. use crate::utils::{check_datatypes, make_scalar_function}; -use arrow::array::{cast::AsArray, Array, ArrayRef, GenericListArray, OffsetSizeTrait}; +use arrow::array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait, cast::AsArray}; use arrow::buffer::OffsetBuffer; use arrow::datatypes::{DataType, FieldRef}; use arrow::row::{RowConverter, SortField}; -use datafusion_common::utils::{take_function_args, ListCoercion}; -use datafusion_common::{internal_err, HashSet, Result}; +use datafusion_common::utils::{ListCoercion, take_function_args}; +use datafusion_common::{HashSet, Result, internal_err}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; diff --git a/datafusion/functions-nested/src/extract.rs b/datafusion/functions-nested/src/extract.rs index 57505c59493a..0f7246c8589c 100644 --- a/datafusion/functions-nested/src/extract.rs +++ b/datafusion/functions-nested/src/extract.rs @@ -35,8 +35,8 @@ use datafusion_common::cast::{ use datafusion_common::internal_err; use datafusion_common::utils::ListCoercion; use datafusion_common::{ - exec_datafusion_err, exec_err, internal_datafusion_err, plan_err, - utils::take_function_args, Result, + Result, exec_datafusion_err, exec_err, internal_datafusion_err, plan_err, + utils::take_function_args, }; use datafusion_expr::{ ArrayFunctionArgument, ArrayFunctionSignature, Expr, TypeSignature, @@ -1034,9 +1034,9 @@ impl ScalarUDFImpl for ArrayAnyValue { } fn return_type(&self, arg_types: &[DataType]) -> Result { match &arg_types[0] { - List(field) - | LargeList(field) - | FixedSizeList(field, _) => Ok(field.data_type().clone()), + List(field) | LargeList(field) | FixedSizeList(field, _) => { + Ok(field.data_type().clone()) + } _ => plan_err!( "array_any_value can only accept List, LargeList or FixedSizeList as the argument" ), @@ -1129,8 +1129,8 @@ where mod tests { use super::{array_element_udf, general_list_view_array_slice}; use arrow::array::{ - cast::AsArray, Array, ArrayRef, GenericListViewArray, Int32Array, Int64Array, - ListViewArray, + Array, ArrayRef, GenericListViewArray, Int32Array, Int64Array, ListViewArray, + cast::AsArray, }; use arrow::buffer::ScalarBuffer; use arrow::datatypes::{DataType, Field}; diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs index 76c4714de1af..33b3e102ae0b 100644 --- a/datafusion/functions-nested/src/flatten.rs +++ b/datafusion/functions-nested/src/flatten.rs @@ -25,7 +25,7 @@ use arrow::datatypes::{ DataType::{FixedSizeList, LargeList, List, Null}, }; use datafusion_common::cast::{as_large_list_array, as_list_array}; -use datafusion_common::{exec_err, utils::take_function_args, Result}; +use datafusion_common::{Result, exec_err, utils::take_function_args}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; diff --git a/datafusion/functions-nested/src/length.rs b/datafusion/functions-nested/src/length.rs index ceceee7bfa52..a9a2731c3b32 100644 --- a/datafusion/functions-nested/src/length.rs +++ b/datafusion/functions-nested/src/length.rs @@ -29,7 +29,7 @@ use arrow::datatypes::{ use datafusion_common::cast::{ as_fixed_size_list_array, as_generic_list_array, as_int64_array, }; -use datafusion_common::{exec_err, Result}; +use datafusion_common::{Result, exec_err}; use datafusion_expr::{ ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, diff --git a/datafusion/functions-nested/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs index 6ca13416504a..410a545853ac 100644 --- a/datafusion/functions-nested/src/make_array.rs +++ b/datafusion/functions-nested/src/make_array.rs @@ -23,18 +23,18 @@ use std::vec; use crate::utils::make_scalar_function; use arrow::array::{ - new_null_array, Array, ArrayData, ArrayRef, Capacities, GenericListArray, - MutableArrayData, NullArray, OffsetSizeTrait, + Array, ArrayData, ArrayRef, Capacities, GenericListArray, MutableArrayData, + NullArray, OffsetSizeTrait, new_null_array, }; use arrow::buffer::OffsetBuffer; use arrow::datatypes::DataType; use arrow::datatypes::{DataType::Null, Field}; use datafusion_common::utils::SingleRowListArrayBuilder; -use datafusion_common::{plan_err, Result}; +use datafusion_common::{Result, plan_err}; +use datafusion_expr::TypeSignature; use datafusion_expr::binary::{ try_type_union_resolution_with_struct, type_union_resolution, }; -use datafusion_expr::TypeSignature; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index fe9bc609c013..a96bbc0589e3 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -25,7 +25,7 @@ use arrow::datatypes::{DataType, Field, SchemaBuilder, ToByteSlice}; use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays}; use datafusion_common::{ - exec_err, utils::take_function_args, HashSet, Result, ScalarValue, + HashSet, Result, ScalarValue, exec_err, utils::take_function_args, }; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::{ diff --git a/datafusion/functions-nested/src/map_entries.rs b/datafusion/functions-nested/src/map_entries.rs index 7d9d103206db..571b5aacafa0 100644 --- a/datafusion/functions-nested/src/map_entries.rs +++ b/datafusion/functions-nested/src/map_entries.rs @@ -21,7 +21,7 @@ use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow::array::{Array, ArrayRef, ListArray}; use arrow::datatypes::{DataType, Field, Fields}; use datafusion_common::utils::take_function_args; -use datafusion_common::{cast::as_map_array, exec_err, Result}; +use datafusion_common::{Result, cast::as_map_array, exec_err}; use datafusion_expr::{ ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 4aab5d7a60d1..3d22d97c45b7 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -19,12 +19,12 @@ use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow::array::{ - make_array, Array, ArrayRef, Capacities, ListArray, MapArray, MutableArrayData, + Array, ArrayRef, Capacities, ListArray, MapArray, MutableArrayData, make_array, }; use arrow::buffer::OffsetBuffer; use arrow::datatypes::{DataType, Field}; use datafusion_common::utils::take_function_args; -use datafusion_common::{cast::as_map_array, exec_err, Result}; +use datafusion_common::{Result, cast::as_map_array, exec_err}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; diff --git a/datafusion/functions-nested/src/map_keys.rs b/datafusion/functions-nested/src/map_keys.rs index 2fc44670d74a..c60334086329 100644 --- a/datafusion/functions-nested/src/map_keys.rs +++ b/datafusion/functions-nested/src/map_keys.rs @@ -21,7 +21,7 @@ use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow::array::{Array, ArrayRef, ListArray}; use arrow::datatypes::{DataType, Field}; use datafusion_common::utils::take_function_args; -use datafusion_common::{cast::as_map_array, exec_err, Result}; +use datafusion_common::{Result, cast::as_map_array, exec_err}; use datafusion_expr::{ ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, diff --git a/datafusion/functions-nested/src/map_values.rs b/datafusion/functions-nested/src/map_values.rs index 6ae8a278063d..4cfb23d3084a 100644 --- a/datafusion/functions-nested/src/map_values.rs +++ b/datafusion/functions-nested/src/map_values.rs @@ -21,7 +21,7 @@ use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow::array::{Array, ArrayRef, ListArray}; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::utils::take_function_args; -use datafusion_common::{cast::as_map_array, exec_err, internal_err, Result}; +use datafusion_common::{Result, cast::as_map_array, exec_err, internal_err}; use datafusion_expr::{ ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, diff --git a/datafusion/functions-nested/src/min_max.rs b/datafusion/functions-nested/src/min_max.rs index 1f3623ca243d..e3603b731fd8 100644 --- a/datafusion/functions-nested/src/min_max.rs +++ b/datafusion/functions-nested/src/min_max.rs @@ -20,10 +20,10 @@ use crate::utils::make_scalar_function; use arrow::array::{ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow::datatypes::DataType; use arrow::datatypes::DataType::{LargeList, List}; +use datafusion_common::Result; use datafusion_common::cast::{as_large_list_array, as_list_array}; use datafusion_common::utils::take_function_args; -use datafusion_common::Result; -use datafusion_common::{exec_err, plan_err, ScalarValue}; +use datafusion_common::{ScalarValue, exec_err, plan_err}; use datafusion_doc::Documentation; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, diff --git a/datafusion/functions-nested/src/planner.rs b/datafusion/functions-nested/src/planner.rs index 4fec5e38065b..22b9074c8af4 100644 --- a/datafusion/functions-nested/src/planner.rs +++ b/datafusion/functions-nested/src/planner.rs @@ -18,15 +18,15 @@ //! SQL planning extensions like [`NestedFunctionPlanner`] and [`FieldAccessPlanner`] use arrow::datatypes::DataType; -use datafusion_common::{plan_err, utils::list_ndims, DFSchema, Result}; +use datafusion_common::{DFSchema, Result, plan_err, utils::list_ndims}; +use datafusion_expr::AggregateUDF; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::expr::{AggregateFunction, AggregateFunctionParams}; #[cfg(feature = "sql")] use datafusion_expr::sqlparser::ast::BinaryOperator; -use datafusion_expr::AggregateUDF; use datafusion_expr::{ - planner::{ExprPlanner, PlannerResult, RawBinaryExpr, RawFieldAccessExpr}, Expr, ExprSchemable, GetFieldAccess, + planner::{ExprPlanner, PlannerResult, RawBinaryExpr, RawFieldAccessExpr}, }; #[cfg(not(feature = "sql"))] use datafusion_expr_common::operator::Operator as BinaryOperator; diff --git a/datafusion/functions-nested/src/position.rs b/datafusion/functions-nested/src/position.rs index 2844eefaf058..d085fa29cc7e 100644 --- a/datafusion/functions-nested/src/position.rs +++ b/datafusion/functions-nested/src/position.rs @@ -31,14 +31,14 @@ use std::any::Any; use std::sync::Arc; use arrow::array::{ - types::UInt64Type, Array, ArrayRef, GenericListArray, ListArray, OffsetSizeTrait, - UInt64Array, + Array, ArrayRef, GenericListArray, ListArray, OffsetSizeTrait, UInt64Array, + types::UInt64Type, }; use datafusion_common::cast::{ as_generic_list_array, as_int64_array, as_large_list_array, as_list_array, }; use datafusion_common::{ - assert_or_internal_err, exec_err, utils::take_function_args, Result, + Result, assert_or_internal_err, exec_err, utils::take_function_args, }; use itertools::Itertools; diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs index e570ecf97420..0e32f51edd7d 100644 --- a/datafusion/functions-nested/src/range.rs +++ b/datafusion/functions-nested/src/range.rs @@ -23,28 +23,28 @@ use arrow::datatypes::TimeUnit; use arrow::datatypes::{DataType, Field, IntervalUnit::MonthDayNano}; use arrow::{ array::{ + Array, ArrayRef, Int64Array, ListArray, ListBuilder, NullBufferBuilder, builder::{Date32Builder, TimestampNanosecondBuilder}, temporal_conversions::as_datetime_with_timezone, timezone::Tz, types::{Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType}, - Array, ArrayRef, Int64Array, ListArray, ListBuilder, NullBufferBuilder, }, compute::cast, }; use datafusion_common::internal_err; use datafusion_common::{ + Result, exec_datafusion_err, exec_err, not_impl_datafusion_err, + utils::take_function_args, +}; +use datafusion_common::{ + ScalarValue, cast::{ as_date32_array, as_int64_array, as_interval_mdn_array, as_timestamp_nanosecond_array, }, types::{ - logical_date, logical_int64, logical_interval_mdn, logical_string, NativeType, + NativeType, logical_date, logical_int64, logical_interval_mdn, logical_string, }, - ScalarValue, -}; -use datafusion_common::{ - exec_datafusion_err, exec_err, not_impl_datafusion_err, utils::take_function_args, - Result, }; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, diff --git a/datafusion/functions-nested/src/remove.rs b/datafusion/functions-nested/src/remove.rs index 46111b0c2d12..6cb4e2841568 100644 --- a/datafusion/functions-nested/src/remove.rs +++ b/datafusion/functions-nested/src/remove.rs @@ -20,14 +20,14 @@ use crate::utils; use crate::utils::make_scalar_function; use arrow::array::{ - cast::AsArray, new_empty_array, Array, ArrayRef, BooleanArray, GenericListArray, - OffsetSizeTrait, + Array, ArrayRef, BooleanArray, GenericListArray, OffsetSizeTrait, cast::AsArray, + new_empty_array, }; use arrow::buffer::OffsetBuffer; use arrow::datatypes::{DataType, Field}; use datafusion_common::cast::as_int64_array; use datafusion_common::utils::ListCoercion; -use datafusion_common::{exec_err, utils::take_function_args, Result}; +use datafusion_common::{Result, exec_err, utils::take_function_args}; use datafusion_expr::{ ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, diff --git a/datafusion/functions-nested/src/repeat.rs b/datafusion/functions-nested/src/repeat.rs index d978081e490c..a121b5f03162 100644 --- a/datafusion/functions-nested/src/repeat.rs +++ b/datafusion/functions-nested/src/repeat.rs @@ -19,8 +19,8 @@ use crate::utils::make_scalar_function; use arrow::array::{ - new_null_array, Array, ArrayRef, Capacities, GenericListArray, ListArray, - MutableArrayData, OffsetSizeTrait, UInt64Array, + Array, ArrayRef, Capacities, GenericListArray, ListArray, MutableArrayData, + OffsetSizeTrait, UInt64Array, new_null_array, }; use arrow::buffer::OffsetBuffer; use arrow::compute; @@ -31,7 +31,7 @@ use arrow::datatypes::{ Field, }; use datafusion_common::cast::{as_large_list_array, as_list_array, as_uint64_array}; -use datafusion_common::{exec_err, utils::take_function_args, Result}; +use datafusion_common::{Result, exec_err, utils::take_function_args}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs index 53182b58988f..03524924e3c3 100644 --- a/datafusion/functions-nested/src/replace.rs +++ b/datafusion/functions-nested/src/replace.rs @@ -18,15 +18,15 @@ //! [`ScalarUDFImpl`] definitions for array_replace, array_replace_n and array_replace_all functions. use arrow::array::{ - new_null_array, Array, ArrayRef, AsArray, Capacities, GenericListArray, - MutableArrayData, NullBufferBuilder, OffsetSizeTrait, + Array, ArrayRef, AsArray, Capacities, GenericListArray, MutableArrayData, + NullBufferBuilder, OffsetSizeTrait, new_null_array, }; use arrow::datatypes::{DataType, Field}; use arrow::buffer::OffsetBuffer; use datafusion_common::cast::as_int64_array; use datafusion_common::utils::ListCoercion; -use datafusion_common::{exec_err, utils::take_function_args, Result}; +use datafusion_common::{Result, exec_err, utils::take_function_args}; use datafusion_expr::{ ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, diff --git a/datafusion/functions-nested/src/resize.rs b/datafusion/functions-nested/src/resize.rs index c76f7970d206..486163df9754 100644 --- a/datafusion/functions-nested/src/resize.rs +++ b/datafusion/functions-nested/src/resize.rs @@ -19,8 +19,8 @@ use crate::utils::make_scalar_function; use arrow::array::{ - new_null_array, Array, ArrayRef, Capacities, GenericListArray, Int64Array, - MutableArrayData, NullBufferBuilder, OffsetSizeTrait, + Array, ArrayRef, Capacities, GenericListArray, Int64Array, MutableArrayData, + NullBufferBuilder, OffsetSizeTrait, new_null_array, }; use arrow::buffer::OffsetBuffer; use arrow::datatypes::DataType; @@ -31,7 +31,7 @@ use arrow::datatypes::{ }; use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array}; use datafusion_common::utils::ListCoercion; -use datafusion_common::{exec_err, internal_datafusion_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, exec_err, internal_datafusion_err}; use datafusion_expr::{ ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, @@ -168,7 +168,7 @@ fn array_resize_inner(arg: &[ArrayRef]) -> Result { return exec_err!( "array_resize does not support type '{:?}'.", array.data_type() - ) + ); } }; return Ok(new_null_array(&return_type, array.len())); diff --git a/datafusion/functions-nested/src/reverse.rs b/datafusion/functions-nested/src/reverse.rs index df873ade798d..114f9a0056ab 100644 --- a/datafusion/functions-nested/src/reverse.rs +++ b/datafusion/functions-nested/src/reverse.rs @@ -32,7 +32,7 @@ use datafusion_common::cast::{ as_fixed_size_list_array, as_large_list_array, as_large_list_view_array, as_list_array, as_list_view_array, }; -use datafusion_common::{exec_err, utils::take_function_args, Result}; +use datafusion_common::{Result, exec_err, utils::take_function_args}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index 4350bfdc5a9b..a8dd857947b1 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -19,8 +19,8 @@ use crate::utils::make_scalar_function; use arrow::array::{ - new_null_array, Array, ArrayRef, GenericListArray, LargeListArray, ListArray, - OffsetSizeTrait, + Array, ArrayRef, GenericListArray, LargeListArray, ListArray, OffsetSizeTrait, + new_null_array, }; use arrow::buffer::OffsetBuffer; use arrow::compute; @@ -30,7 +30,7 @@ use arrow::row::{RowConverter, SortField}; use datafusion_common::cast::{as_large_list_array, as_list_array}; use datafusion_common::utils::ListCoercion; use datafusion_common::{ - assert_eq_or_internal_err, exec_err, internal_err, utils::take_function_args, Result, + Result, assert_eq_or_internal_err, exec_err, internal_err, utils::take_function_args, }; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, @@ -538,7 +538,7 @@ fn general_array_distinct( let array = match arrays.first() { Some(array) => Arc::clone(array), None => { - return internal_err!("array_distinct: failed to get array from rows") + return internal_err!("array_distinct: failed to get array from rows"); } }; new_arrays.push(array); @@ -567,14 +567,14 @@ mod tests { buffer::OffsetBuffer, datatypes::{DataType, Field}, }; - use datafusion_common::{config::ConfigOptions, DataFusionError}; + use datafusion_common::{DataFusionError, config::ConfigOptions}; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use crate::set_ops::array_distinct_udf; #[test] - fn test_array_distinct_inner_nullability_result_type_match_return_type( - ) -> Result<(), DataFusionError> { + fn test_array_distinct_inner_nullability_result_type_match_return_type() + -> Result<(), DataFusionError> { let udf = array_distinct_udf(); for inner_nullable in [true, false] { diff --git a/datafusion/functions-nested/src/sort.rs b/datafusion/functions-nested/src/sort.rs index 8cfc8a297b7b..ba2da0f760ee 100644 --- a/datafusion/functions-nested/src/sort.rs +++ b/datafusion/functions-nested/src/sort.rs @@ -19,7 +19,7 @@ use crate::utils::make_scalar_function; use arrow::array::{ - new_null_array, Array, ArrayRef, GenericListArray, NullBufferBuilder, OffsetSizeTrait, + Array, ArrayRef, GenericListArray, NullBufferBuilder, OffsetSizeTrait, new_null_array, }; use arrow::buffer::OffsetBuffer; use arrow::compute::SortColumn; @@ -27,7 +27,7 @@ use arrow::datatypes::{DataType, FieldRef}; use arrow::{compute, compute::SortOptions}; use datafusion_common::cast::{as_large_list_array, as_list_array, as_string_array}; use datafusion_common::utils::ListCoercion; -use datafusion_common::{exec_err, plan_err, Result}; +use datafusion_common::{Result, exec_err, plan_err}; use datafusion_expr::{ ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs index e19025cf673e..1c8d58fca80d 100644 --- a/datafusion/functions-nested/src/string.rs +++ b/datafusion/functions-nested/src/string.rs @@ -19,22 +19,22 @@ use arrow::array::{ Array, ArrayRef, BooleanArray, Float32Array, Float64Array, GenericListArray, - Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, ListBuilder, - OffsetSizeTrait, StringArray, StringBuilder, UInt16Array, UInt32Array, UInt64Array, - UInt8Array, + Int8Array, Int16Array, Int32Array, Int64Array, LargeStringArray, ListBuilder, + OffsetSizeTrait, StringArray, StringBuilder, UInt8Array, UInt16Array, UInt32Array, + UInt64Array, }; use arrow::datatypes::{DataType, Field}; use datafusion_common::utils::ListCoercion; -use datafusion_common::{not_impl_err, DataFusionError, Result}; +use datafusion_common::{DataFusionError, Result, not_impl_err}; use std::any::Any; use crate::utils::make_scalar_function; use arrow::array::{ + GenericStringArray, StringArrayType, StringViewArray, builder::{ArrayBuilder, LargeStringBuilder, StringViewBuilder}, cast::AsArray, - GenericStringArray, StringArrayType, StringViewArray, }; use arrow::compute::cast; use arrow::datatypes::DataType::{ @@ -340,7 +340,11 @@ fn array_to_string_inner(args: &[ArrayRef]) -> Result { Utf8 => args[1].as_string::().iter().collect(), Utf8View => args[1].as_string_view().iter().collect(), LargeUtf8 => args[1].as_string::().iter().collect(), - other => return exec_err!("unsupported type for second argument to array_to_string function as {other:?}") + other => { + return exec_err!( + "unsupported type for second argument to array_to_string function as {other:?}" + ); + } }; let mut null_string = String::from(""); @@ -350,7 +354,11 @@ fn array_to_string_inner(args: &[ArrayRef]) -> Result { Utf8 => args[2].as_string::().value(0).to_string(), Utf8View => args[2].as_string_view().value(0).to_string(), LargeUtf8 => args[2].as_string::().value(0).to_string(), - other => return exec_err!("unsupported type for second argument to array_to_string function as {other:?}") + other => { + return exec_err!( + "unsupported type for second argument to array_to_string function as {other:?}" + ); + } }; with_null_string = true; } @@ -527,20 +535,40 @@ fn string_to_array_inner(args: &[ArrayRef]) -> Result { let string_array = args[0].as_string::(); - let builder = StringBuilder::with_capacity(string_array.len(), string_array.get_buffer_memory_size()); - string_to_array_inner_2::<&GenericStringArray, StringBuilder>(args, &string_array, builder) + let builder = StringBuilder::with_capacity( + string_array.len(), + string_array.get_buffer_memory_size(), + ); + string_to_array_inner_2::<&GenericStringArray, StringBuilder>( + args, + &string_array, + builder, + ) } Utf8View => { let string_array = args[0].as_string_view(); let builder = StringViewBuilder::with_capacity(string_array.len()); - string_to_array_inner_2::<&StringViewArray, StringViewBuilder>(args, &string_array, builder) + string_to_array_inner_2::<&StringViewArray, StringViewBuilder>( + args, + &string_array, + builder, + ) } LargeUtf8 => { let string_array = args[0].as_string::(); - let builder = LargeStringBuilder::with_capacity(string_array.len(), string_array.get_buffer_memory_size()); - string_to_array_inner_2::<&GenericStringArray, LargeStringBuilder>(args, &string_array, builder) + let builder = LargeStringBuilder::with_capacity( + string_array.len(), + string_array.get_buffer_memory_size(), + ); + string_to_array_inner_2::<&GenericStringArray, LargeStringBuilder>( + args, + &string_array, + builder, + ) } - other => exec_err!("unsupported type for first argument to string_to_array function as {other:?}") + other => exec_err!( + "unsupported type for first argument to string_to_array function as {other:?}" + ), } } @@ -564,9 +592,11 @@ where StringBuilderType, >(string_array, &delimiter_array, None, string_builder) } else { - string_to_array_inner_3::, - StringBuilderType>(args, string_array, &delimiter_array, string_builder) + StringBuilderType, + >(args, string_array, &delimiter_array, string_builder) } } Utf8View => { @@ -580,9 +610,11 @@ where StringBuilderType, >(string_array, &delimiter_array, None, string_builder) } else { - string_to_array_inner_3::(args, string_array, &delimiter_array, string_builder) + StringBuilderType, + >(args, string_array, &delimiter_array, string_builder) } } LargeUtf8 => { @@ -595,12 +627,16 @@ where StringBuilderType, >(string_array, &delimiter_array, None, string_builder) } else { - string_to_array_inner_3::, - StringBuilderType>(args, string_array, &delimiter_array, string_builder) + StringBuilderType, + >(args, string_array, &delimiter_array, string_builder) } } - other => exec_err!("unsupported type for second argument to string_to_array function as {other:?}") + other => exec_err!( + "unsupported type for second argument to string_to_array function as {other:?}" + ), } } diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs index 464301b6ffcf..d2a69c010e8e 100644 --- a/datafusion/functions-nested/src/utils.rs +++ b/datafusion/functions-nested/src/utils.rs @@ -28,7 +28,7 @@ use arrow::buffer::OffsetBuffer; use datafusion_common::cast::{ as_fixed_size_list_array, as_large_list_array, as_list_array, }; -use datafusion_common::{exec_err, internal_err, plan_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, exec_err, internal_err, plan_err}; use datafusion_expr::ColumnarValue; use itertools::Itertools as _; diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index 15d3261ca513..0fb08684cd14 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -21,7 +21,7 @@ description = "DataFusion Query Optimizer" keywords = ["datafusion", "query", "optimizer"] readme = "README.md" version = { workspace = true } -edition = { workspace = true } +edition = "2024" homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } diff --git a/datafusion/optimizer/benches/projection_unnecessary.rs b/datafusion/optimizer/benches/projection_unnecessary.rs index bdc59de4820b..2082ed6a3751 100644 --- a/datafusion/optimizer/benches/projection_unnecessary.rs +++ b/datafusion/optimizer/benches/projection_unnecessary.rs @@ -16,10 +16,10 @@ // under the License. use arrow::datatypes::{DataType, Field, Schema}; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use datafusion_common::ToDFSchema; use datafusion_common::{Column, TableReference}; -use datafusion_expr::{logical_plan::LogicalPlan, projection_schema, Expr}; +use datafusion_expr::{Expr, logical_plan::LogicalPlan, projection_schema}; use datafusion_optimizer::optimize_projections::is_projection_unnecessary; use std::hint::black_box; use std::sync::Arc; diff --git a/datafusion/optimizer/src/analyzer/function_rewrite.rs b/datafusion/optimizer/src/analyzer/function_rewrite.rs index c6bf14ebce2e..9faa60d939fe 100644 --- a/datafusion/optimizer/src/analyzer/function_rewrite.rs +++ b/datafusion/optimizer/src/analyzer/function_rewrite.rs @@ -23,9 +23,9 @@ use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{DFSchema, Result}; use crate::utils::NamePreserver; +use datafusion_expr::LogicalPlan; use datafusion_expr::expr_rewriter::FunctionRewrite; use datafusion_expr::utils::merge_schema; -use datafusion_expr::LogicalPlan; use std::sync::Arc; /// Analyzer rule that invokes [`FunctionRewrite`]s on expressions diff --git a/datafusion/optimizer/src/analyzer/mod.rs b/datafusion/optimizer/src/analyzer/mod.rs index 272692f98368..ddb3b828f01d 100644 --- a/datafusion/optimizer/src/analyzer/mod.rs +++ b/datafusion/optimizer/src/analyzer/mod.rs @@ -22,9 +22,9 @@ use std::sync::Arc; use log::debug; +use datafusion_common::Result; use datafusion_common::config::ConfigOptions; use datafusion_common::instant::Instant; -use datafusion_common::Result; use datafusion_expr::expr_rewriter::FunctionRewrite; use datafusion_expr::{InvariantLevel, LogicalPlan}; diff --git a/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs b/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs index 6381db63122d..747c54e2cd26 100644 --- a/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs +++ b/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs @@ -28,14 +28,14 @@ use arrow::datatypes::DataType; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::{ - internal_datafusion_err, plan_err, Column, DFSchema, Result, ScalarValue, + Column, DFSchema, Result, ScalarValue, internal_datafusion_err, plan_err, }; use datafusion_expr::expr::{AggregateFunction, Alias}; use datafusion_expr::logical_plan::LogicalPlan; use datafusion_expr::utils::grouping_set_to_exprlist; use datafusion_expr::{ - bitwise_and, bitwise_or, bitwise_shift_left, bitwise_shift_right, cast, Aggregate, - Expr, Projection, + Aggregate, Expr, Projection, bitwise_and, bitwise_or, bitwise_shift_left, + bitwise_shift_right, cast, }; use itertools::Itertools; @@ -150,7 +150,7 @@ fn analyze_internal(plan: LogicalPlan) -> Result> { fn is_grouping_function(expr: &Expr) -> bool { // TODO: Do something better than name here should grouping be a built // in expression? - matches!(expr, Expr::AggregateFunction(AggregateFunction { ref func, .. }) if func.name() == "grouping") + matches!(expr, Expr::AggregateFunction(AggregateFunction { func, .. }) if func.name() == "grouping") } fn contains_grouping_function(exprs: &[Expr]) -> bool { diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 85751fd70c05..bc317e9c201c 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use datafusion_expr::binary::BinaryTypeCoercer; -use itertools::{izip, Itertools as _}; +use itertools::{Itertools as _, izip}; use arrow::datatypes::{DataType, Field, IntervalUnit, Schema}; @@ -29,9 +29,9 @@ use crate::utils::NamePreserver; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion_common::{ + Column, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, TableReference, exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, - plan_err, Column, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, - TableReference, + plan_err, }; use datafusion_expr::expr::{ self, AggregateFunctionParams, Alias, Between, BinaryExpr, Case, Exists, InList, @@ -50,9 +50,9 @@ use datafusion_expr::type_coercion::other::{ use datafusion_expr::type_coercion::{is_datetime, is_utf8_or_utf8view_or_large_utf8}; use datafusion_expr::utils::merge_schema; use datafusion_expr::{ - is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, not, AggregateUDF, Expr, ExprSchemable, Join, Limit, LogicalPlan, Operator, Projection, - ScalarUDF, Union, WindowFrame, WindowFrameBound, WindowFrameUnits, + ScalarUDF, Union, WindowFrame, WindowFrameBound, WindowFrameUnits, is_false, + is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, not, }; /// Performs type coercion by determining the schema @@ -480,7 +480,8 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> { get_coerce_type_for_list(&expr_data_type, &list_data_types); match result_type { None => plan_err!( - "Can not find compatible types to compare {expr_data_type} with [{}]", list_data_types.iter().join(", ") + "Can not find compatible types to compare {expr_data_type} with [{}]", + list_data_types.iter().join(", ") ), Some(coerced_type) => { // find the coerced type @@ -491,9 +492,9 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> { list_expr.cast_to(&coerced_type, self.schema) }) .collect::>>()?; - Ok(Transformed::yes(Expr::InList(InList ::new( - Box::new(cast_expr), - cast_list_expr, + Ok(Transformed::yes(Expr::InList(InList::new( + Box::new(cast_expr), + cast_list_expr, negated, )))) } @@ -1119,10 +1120,10 @@ mod test { use arrow::datatypes::{DataType, Field, Schema, SchemaBuilder, TimeUnit}; use insta::assert_snapshot; + use crate::analyzer::Analyzer; use crate::analyzer::type_coercion::{ - coerce_case_expression, TypeCoercion, TypeCoercionRewriter, + TypeCoercion, TypeCoercionRewriter, coerce_case_expression, }; - use crate::analyzer::Analyzer; use crate::assert_analyzed_plan_with_config_eq_snapshot; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{TransformedResult, TreeNode}; @@ -1131,10 +1132,10 @@ mod test { use datafusion_expr::logical_plan::{EmptyRelation, Projection, Sort}; use datafusion_expr::test::function_stub::avg_udaf; use datafusion_expr::{ - cast, col, create_udaf, is_true, lit, AccumulatorFactoryFunction, AggregateUDF, - BinaryExpr, Case, ColumnarValue, Expr, ExprSchemable, Filter, LogicalPlan, - Operator, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, - SimpleAggregateUDF, Subquery, Union, Volatility, + AccumulatorFactoryFunction, AggregateUDF, BinaryExpr, Case, ColumnarValue, Expr, + ExprSchemable, Filter, LogicalPlan, Operator, ScalarFunctionArgs, ScalarUDF, + ScalarUDFImpl, Signature, SimpleAggregateUDF, Subquery, Union, Volatility, cast, + col, create_udaf, is_true, lit, }; use datafusion_functions_aggregate::average::AvgAccumulator; use datafusion_sql::TableReference; @@ -1882,7 +1883,7 @@ mod test { let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); assert_type_coercion_error( plan, - "Cannot infer common argument type for comparison operation Int64 IS DISTINCT FROM Boolean" + "Cannot infer common argument type for comparison operation Int64 IS DISTINCT FROM Boolean", )?; // is not true @@ -2028,7 +2029,7 @@ mod test { let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); assert_type_coercion_error( plan, - "Cannot infer common argument type for comparison operation Utf8 IS DISTINCT FROM Boolean" + "Cannot infer common argument type for comparison operation Utf8 IS DISTINCT FROM Boolean", )?; // is not unknown diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 251006849459..d9273a8f60fb 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -27,14 +27,14 @@ use crate::optimizer::ApplyOrder; use crate::utils::NamePreserver; use datafusion_common::alias::AliasGenerator; -use datafusion_common::cse::{CSEController, FoundCommonNodes, CSE}; +use datafusion_common::cse::{CSE, CSEController, FoundCommonNodes}; use datafusion_common::tree_node::{Transformed, TreeNode}; -use datafusion_common::{qualified_name, Column, DFSchema, DFSchemaRef, Result}; +use datafusion_common::{Column, DFSchema, DFSchemaRef, Result, qualified_name}; use datafusion_expr::expr::{Alias, ScalarFunction}; use datafusion_expr::logical_plan::{ Aggregate, Filter, LogicalPlan, Projection, Sort, Window, }; -use datafusion_expr::{col, BinaryExpr, Case, Expr, Operator, SortExpr}; +use datafusion_expr::{BinaryExpr, Case, Expr, Operator, SortExpr, col}; const CSE_PREFIX: &str = "__common_expr"; @@ -814,11 +814,11 @@ mod test { use std::iter; use arrow::datatypes::{DataType, Field, Schema}; - use datafusion_expr::logical_plan::{table_scan, JoinType}; + use datafusion_expr::logical_plan::{JoinType, table_scan}; use datafusion_expr::{ - grouping_set, is_null, not, AccumulatorFactoryFunction, AggregateUDF, - ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, - SimpleAggregateUDF, Volatility, + AccumulatorFactoryFunction, AggregateUDF, ColumnarValue, ScalarFunctionArgs, + ScalarUDF, ScalarUDFImpl, Signature, SimpleAggregateUDF, Volatility, + grouping_set, is_null, not, }; use datafusion_expr::{lit, logical_plan::builder::LogicalPlanBuilder}; diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs index 63236787743a..e8a9c8c83ae9 100644 --- a/datafusion/optimizer/src/decorrelate.rs +++ b/datafusion/optimizer/src/decorrelate.rs @@ -26,15 +26,15 @@ use crate::simplify_expressions::ExprSimplifier; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter, }; -use datafusion_common::{plan_err, Column, DFSchemaRef, HashMap, Result, ScalarValue}; +use datafusion_common::{Column, DFSchemaRef, HashMap, Result, ScalarValue, plan_err}; use datafusion_expr::expr::Alias; use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::utils::{ collect_subquery_cols, conjunction, find_join_exprs, split_conjunction, }; use datafusion_expr::{ - expr, lit, BinaryExpr, Cast, EmptyRelation, Expr, FetchType, LogicalPlan, - LogicalPlanBuilder, Operator, + BinaryExpr, Cast, EmptyRelation, Expr, FetchType, LogicalPlan, LogicalPlanBuilder, + Operator, expr, lit, }; use datafusion_physical_expr::execution_props::ExecutionProps; diff --git a/datafusion/optimizer/src/decorrelate_lateral_join.rs b/datafusion/optimizer/src/decorrelate_lateral_join.rs index 7d2072ad1ce9..4d3838c78e5e 100644 --- a/datafusion/optimizer/src/decorrelate_lateral_join.rs +++ b/datafusion/optimizer/src/decorrelate_lateral_join.rs @@ -22,12 +22,12 @@ use std::collections::BTreeSet; use crate::decorrelate::PullUpCorrelatedExpr; use crate::optimizer::ApplyOrder; use crate::{OptimizerConfig, OptimizerRule}; -use datafusion_expr::{lit, Join}; +use datafusion_expr::{Join, lit}; +use datafusion_common::Result; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, }; -use datafusion_common::Result; use datafusion_expr::logical_plan::JoinType; use datafusion_expr::utils::conjunction; use datafusion_expr::{LogicalPlan, LogicalPlanBuilder}; diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs index 0590aba52bfa..e705b909dc51 100644 --- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs +++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs @@ -27,14 +27,14 @@ use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::alias::AliasGenerator; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::{assert_or_internal_err, plan_err, Column, Result}; +use datafusion_common::{Column, Result, assert_or_internal_err, plan_err}; use datafusion_expr::expr::{Exists, InSubquery}; use datafusion_expr::expr_rewriter::create_col_from_scalar_expr; use datafusion_expr::logical_plan::{JoinType, Subquery}; use datafusion_expr::utils::{conjunction, expr_to_columns, split_conjunction_owned}; use datafusion_expr::{ - exists, in_subquery, lit, not, not_exists, not_in_subquery, BinaryExpr, Expr, Filter, - LogicalPlan, LogicalPlanBuilder, Operator, + BinaryExpr, Expr, Filter, LogicalPlan, LogicalPlanBuilder, Operator, exists, + in_subquery, lit, not, not_exists, not_in_subquery, }; use log::debug; @@ -364,8 +364,8 @@ fn build_join( })), ) => { let right_col = create_col_from_scalar_expr(right.deref(), alias)?; - let in_predicate = Expr::eq(left.deref().clone(), Expr::Column(right_col)); - in_predicate + + Expr::eq(left.deref().clone(), Expr::Column(right_col)) } (None, None) => lit(true), _ => return Ok(None), diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs index ae1d7df46d52..26d767a5421f 100644 --- a/datafusion/optimizer/src/eliminate_cross_join.rs +++ b/datafusion/optimizer/src/eliminate_cross_join.rs @@ -27,7 +27,7 @@ use datafusion_expr::logical_plan::{ Filter, Join, JoinConstraint, JoinType, LogicalPlan, Projection, }; use datafusion_expr::utils::{can_hash, find_valid_equijoin_key_pair}; -use datafusion_expr::{and, build_join_schema, ExprSchemable, Operator}; +use datafusion_expr::{ExprSchemable, Operator, and, build_join_schema}; #[derive(Default, Debug)] pub struct EliminateCrossJoin; @@ -276,10 +276,9 @@ fn can_flatten_join_inputs(plan: &LogicalPlan) -> bool { join_type: JoinType::Inner, .. }) = child + && !can_flatten_join_inputs(child) { - if !can_flatten_join_inputs(child) { - return false; - } + return false; } } true @@ -316,10 +315,10 @@ fn find_inner_join( )?; // Save join keys - if let Some((valid_l, valid_r)) = key_pair { - if can_hash(&valid_l.get_type(left_input.schema())?) { - join_keys.push((valid_l, valid_r)); - } + if let Some((valid_l, valid_r)) = key_pair + && can_hash(&valid_l.get_type(left_input.schema())?) + { + join_keys.push((valid_l, valid_r)); } } @@ -449,9 +448,9 @@ mod tests { use crate::test::*; use datafusion_expr::{ + Operator::{And, Or}, binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder, - Operator::{And, Or}, }; use insta::assert_snapshot; diff --git a/datafusion/optimizer/src/eliminate_duplicated_expr.rs b/datafusion/optimizer/src/eliminate_duplicated_expr.rs index a6651df938a7..0bdfb4522433 100644 --- a/datafusion/optimizer/src/eliminate_duplicated_expr.rs +++ b/datafusion/optimizer/src/eliminate_duplicated_expr.rs @@ -19,8 +19,8 @@ use crate::optimizer::ApplyOrder; use crate::{OptimizerConfig, OptimizerRule}; -use datafusion_common::tree_node::Transformed; use datafusion_common::Result; +use datafusion_common::tree_node::Transformed; use datafusion_expr::logical_plan::LogicalPlan; use datafusion_expr::{Aggregate, Expr, Sort, SortExpr}; use std::hash::{Hash, Hasher}; @@ -118,9 +118,9 @@ impl OptimizerRule for EliminateDuplicatedExpr { #[cfg(test)] mod tests { use super::*; + use crate::OptimizerContext; use crate::assert_optimized_plan_eq_snapshot; use crate::test::*; - use crate::OptimizerContext; use datafusion_expr::{col, logical_plan::builder::LogicalPlanBuilder}; use std::sync::Arc; diff --git a/datafusion/optimizer/src/eliminate_filter.rs b/datafusion/optimizer/src/eliminate_filter.rs index 1b763d6f8957..e150406d6ff5 100644 --- a/datafusion/optimizer/src/eliminate_filter.rs +++ b/datafusion/optimizer/src/eliminate_filter.rs @@ -81,10 +81,10 @@ impl OptimizerRule for EliminateFilter { mod tests { use std::sync::Arc; - use crate::assert_optimized_plan_eq_snapshot; use crate::OptimizerContext; + use crate::assert_optimized_plan_eq_snapshot; use datafusion_common::{Result, ScalarValue}; - use datafusion_expr::{col, lit, logical_plan::builder::LogicalPlanBuilder, Expr}; + use datafusion_expr::{Expr, col, lit, logical_plan::builder::LogicalPlanBuilder}; use crate::eliminate_filter::EliminateFilter; use crate::test::*; diff --git a/datafusion/optimizer/src/eliminate_group_by_constant.rs b/datafusion/optimizer/src/eliminate_group_by_constant.rs index 4e16fc0aa159..e93edc62403a 100644 --- a/datafusion/optimizer/src/eliminate_group_by_constant.rs +++ b/datafusion/optimizer/src/eliminate_group_by_constant.rs @@ -19,8 +19,8 @@ use crate::optimizer::ApplyOrder; use crate::{OptimizerConfig, OptimizerRule}; -use datafusion_common::tree_node::Transformed; use datafusion_common::Result; +use datafusion_common::tree_node::Transformed; use datafusion_expr::{Aggregate, Expr, LogicalPlan, LogicalPlanBuilder, Volatility}; /// Optimizer rule that removes constant expressions from `GROUP BY` clause @@ -115,16 +115,16 @@ fn is_constant_expression(expr: &Expr) -> bool { #[cfg(test)] mod tests { use super::*; + use crate::OptimizerContext; use crate::assert_optimized_plan_eq_snapshot; use crate::test::*; - use crate::OptimizerContext; use arrow::datatypes::DataType; use datafusion_common::Result; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::{ - col, lit, ColumnarValue, LogicalPlanBuilder, ScalarFunctionArgs, ScalarUDF, - ScalarUDFImpl, Signature, TypeSignature, + ColumnarValue, LogicalPlanBuilder, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, + Signature, TypeSignature, col, lit, }; use datafusion_functions_aggregate::expr_fn::count; diff --git a/datafusion/optimizer/src/eliminate_join.rs b/datafusion/optimizer/src/eliminate_join.rs index 412bbea2ae92..885910c1e418 100644 --- a/datafusion/optimizer/src/eliminate_join.rs +++ b/datafusion/optimizer/src/eliminate_join.rs @@ -22,8 +22,8 @@ use datafusion_common::tree_node::Transformed; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::JoinType::Inner; use datafusion_expr::{ - logical_plan::{EmptyRelation, LogicalPlan}, Expr, + logical_plan::{EmptyRelation, LogicalPlan}, }; /// Eliminates joins when join condition is false. @@ -74,9 +74,9 @@ impl OptimizerRule for EliminateJoin { #[cfg(test)] mod tests { + use crate::OptimizerContext; use crate::assert_optimized_plan_eq_snapshot; use crate::eliminate_join::EliminateJoin; - use crate::OptimizerContext; use datafusion_common::Result; use datafusion_expr::JoinType::Inner; use datafusion_expr::{lit, logical_plan::builder::LogicalPlanBuilder}; diff --git a/datafusion/optimizer/src/eliminate_limit.rs b/datafusion/optimizer/src/eliminate_limit.rs index 8e25d3246f6c..4309de1822da 100644 --- a/datafusion/optimizer/src/eliminate_limit.rs +++ b/datafusion/optimizer/src/eliminate_limit.rs @@ -18,8 +18,8 @@ //! [`EliminateLimit`] eliminates `LIMIT` when possible use crate::optimizer::ApplyOrder; use crate::{OptimizerConfig, OptimizerRule}; -use datafusion_common::tree_node::Transformed; use datafusion_common::Result; +use datafusion_common::tree_node::Transformed; use datafusion_expr::logical_plan::{EmptyRelation, FetchType, LogicalPlan, SkipType}; use std::sync::Arc; @@ -90,12 +90,12 @@ impl OptimizerRule for EliminateLimit { #[cfg(test)] mod tests { use super::*; - use crate::test::*; use crate::OptimizerContext; + use crate::test::*; use datafusion_common::Column; use datafusion_expr::{ col, - logical_plan::{builder::LogicalPlanBuilder, JoinType}, + logical_plan::{JoinType, builder::LogicalPlanBuilder}, }; use std::sync::Arc; diff --git a/datafusion/optimizer/src/eliminate_outer_join.rs b/datafusion/optimizer/src/eliminate_outer_join.rs index 45877642f276..fcc21ca415b0 100644 --- a/datafusion/optimizer/src/eliminate_outer_join.rs +++ b/datafusion/optimizer/src/eliminate_outer_join.rs @@ -304,15 +304,15 @@ fn extract_non_nullable_columns( #[cfg(test)] mod tests { use super::*; + use crate::OptimizerContext; use crate::assert_optimized_plan_eq_snapshot; use crate::test::*; - use crate::OptimizerContext; use arrow::datatypes::DataType; use datafusion_expr::{ + Operator::{And, Or}, binary_expr, cast, col, lit, logical_plan::builder::LogicalPlanBuilder, try_cast, - Operator::{And, Or}, }; macro_rules! assert_optimized_plan_equal { diff --git a/datafusion/optimizer/src/extract_equijoin_predicate.rs b/datafusion/optimizer/src/extract_equijoin_predicate.rs index 9228e84abf93..276af87b2dcb 100644 --- a/datafusion/optimizer/src/extract_equijoin_predicate.rs +++ b/datafusion/optimizer/src/extract_equijoin_predicate.rs @@ -19,7 +19,7 @@ use crate::optimizer::ApplyOrder; use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::tree_node::Transformed; -use datafusion_common::{assert_or_internal_err, DFSchema}; +use datafusion_common::{DFSchema, assert_or_internal_err}; use datafusion_common::{NullEquality, Result}; use datafusion_expr::utils::split_conjunction_owned; use datafusion_expr::utils::{can_hash, find_valid_equijoin_key_pair}; @@ -273,7 +273,7 @@ mod tests { use crate::test::*; use arrow::datatypes::DataType; use datafusion_expr::{ - col, lit, logical_plan::builder::LogicalPlanBuilder, JoinType, + JoinType, col, lit, logical_plan::builder::LogicalPlanBuilder, }; use std::sync::Arc; diff --git a/datafusion/optimizer/src/filter_null_join_keys.rs b/datafusion/optimizer/src/filter_null_join_keys.rs index 8ad7fa53c0e3..c8f419d3e543 100644 --- a/datafusion/optimizer/src/filter_null_join_keys.rs +++ b/datafusion/optimizer/src/filter_null_join_keys.rs @@ -23,7 +23,7 @@ use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::tree_node::Transformed; use datafusion_common::{NullEquality, Result}; use datafusion_expr::utils::conjunction; -use datafusion_expr::{logical_plan::Filter, Expr, ExprSchemable, LogicalPlan}; +use datafusion_expr::{Expr, ExprSchemable, LogicalPlan, logical_plan::Filter}; use std::sync::Arc; /// The FilterNullJoinKeys rule will identify joins with equi-join conditions @@ -108,12 +108,12 @@ fn create_not_null_predicate(filters: Vec) -> Expr { #[cfg(test)] mod tests { use super::*; - use crate::assert_optimized_plan_eq_snapshot; use crate::OptimizerContext; + use crate::assert_optimized_plan_eq_snapshot; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::Column; use datafusion_expr::logical_plan::table_scan; - use datafusion_expr::{col, lit, JoinType, LogicalPlanBuilder}; + use datafusion_expr::{JoinType, LogicalPlanBuilder, col, lit}; macro_rules! assert_optimized_plan_equal { ( diff --git a/datafusion/optimizer/src/join_key_set.rs b/datafusion/optimizer/src/join_key_set.rs index 0a97173b3096..de795c0aeacf 100644 --- a/datafusion/optimizer/src/join_key_set.rs +++ b/datafusion/optimizer/src/join_key_set.rs @@ -157,7 +157,7 @@ impl Equivalent<(Expr, Expr)> for ExprPair<'_> { #[cfg(test)] mod test { use crate::join_key_set::JoinKeySet; - use datafusion_expr::{col, Expr}; + use datafusion_expr::{Expr, col}; #[test] fn test_insert() { diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index ee7b006a2d49..81b119088c52 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -25,13 +25,13 @@ use std::collections::HashSet; use std::sync::Arc; use datafusion_common::{ - assert_eq_or_internal_err, get_required_group_by_exprs_indices, - internal_datafusion_err, internal_err, Column, DFSchema, HashMap, JoinType, Result, + Column, DFSchema, HashMap, JoinType, Result, assert_eq_or_internal_err, + get_required_group_by_exprs_indices, internal_datafusion_err, internal_err, }; use datafusion_expr::expr::Alias; use datafusion_expr::{ - logical_plan::LogicalPlan, Aggregate, Distinct, EmptyRelation, Expr, Projection, - TableScan, Unnest, Window, + Aggregate, Distinct, EmptyRelation, Expr, Projection, TableScan, Unnest, Window, + logical_plan::LogicalPlan, }; use crate::optimize_projections::required_indices::RequiredIndices; @@ -138,7 +138,7 @@ fn optimize_projections( LogicalPlan::Projection(proj) => { return merge_consecutive_projections(proj)?.transform_data(|proj| { rewrite_projection_given_requirements(proj, config, &indices) - }) + }); } LogicalPlan::Aggregate(aggregate) => { // Split parent requirements to GROUP BY and aggregate sections: @@ -882,12 +882,11 @@ pub fn is_projection_unnecessary( /// subqueries like scalar, EXISTS, or IN. These cases prevent projection /// pushdown for now because we cannot safely reason about their column usage. fn plan_contains_other_subqueries(plan: &LogicalPlan, cte_name: &str) -> bool { - if let LogicalPlan::SubqueryAlias(alias) = plan { - if alias.alias.table() != cte_name - && !subquery_alias_targets_recursive_cte(alias.input.as_ref(), cte_name) - { - return true; - } + if let LogicalPlan::SubqueryAlias(alias) = plan + && alias.alias.table() != cte_name + && !subquery_alias_targets_recursive_cte(alias.input.as_ref(), cte_name) + { + return true; } let mut found = false; @@ -957,14 +956,15 @@ mod tests { }; use datafusion_expr::ExprFunctionExt; use datafusion_expr::{ - binary_expr, build_join_schema, + BinaryExpr, Expr, Extension, Like, LogicalPlan, Operator, Projection, + UserDefinedLogicalNodeCore, WindowFunctionDefinition, binary_expr, + build_join_schema, builder::table_scan_with_filters, col, expr::{self, Cast}, lit, logical_plan::{builder::LogicalPlanBuilder, table_scan}, - not, try_cast, when, BinaryExpr, Expr, Extension, Like, LogicalPlan, Operator, - Projection, UserDefinedLogicalNodeCore, WindowFunctionDefinition, + not, try_cast, when, }; use insta::assert_snapshot; diff --git a/datafusion/optimizer/src/optimize_unions.rs b/datafusion/optimizer/src/optimize_unions.rs index 23a6fe95e579..969cf6a120c0 100644 --- a/datafusion/optimizer/src/optimize_unions.rs +++ b/datafusion/optimizer/src/optimize_unions.rs @@ -18,8 +18,8 @@ //! [`OptimizeUnions`]: removes `Union` nodes in the logical plan. use crate::optimizer::ApplyOrder; use crate::{OptimizerConfig, OptimizerRule}; -use datafusion_common::tree_node::Transformed; use datafusion_common::Result; +use datafusion_common::tree_node::Transformed; use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema; use datafusion_expr::{Distinct, LogicalPlan, Projection, Union}; use itertools::Itertools; @@ -151,10 +151,10 @@ fn extract_plan_from_distinct(plan: Arc) -> Arc { #[cfg(test)] mod tests { use super::*; - use crate::analyzer::type_coercion::TypeCoercion; + use crate::OptimizerContext; use crate::analyzer::Analyzer; + use crate::analyzer::type_coercion::TypeCoercion; use crate::assert_optimized_plan_eq_snapshot; - use crate::OptimizerContext; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::config::ConfigOptions; use datafusion_expr::{col, logical_plan::table_scan}; diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index 421563d5e7e8..ededcec0a47c 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -22,14 +22,14 @@ use std::sync::Arc; use chrono::{DateTime, Utc}; use datafusion_expr::registry::FunctionRegistry; -use datafusion_expr::{assert_expected_schema, InvariantLevel}; +use datafusion_expr::{InvariantLevel, assert_expected_schema}; use log::{debug, warn}; use datafusion_common::alias::AliasGenerator; use datafusion_common::config::ConfigOptions; use datafusion_common::instant::Instant; use datafusion_common::tree_node::{Transformed, TreeNodeRewriter}; -use datafusion_common::{internal_err, DFSchema, DataFusionError, HashSet, Result}; +use datafusion_common::{DFSchema, DataFusionError, HashSet, Result, internal_err}; use datafusion_expr::logical_plan::LogicalPlan; use crate::common_subexpr_eliminate::CommonSubexprEliminate; @@ -288,9 +288,7 @@ impl TreeNodeRewriter for Rewriter<'_> { fn f_down(&mut self, node: LogicalPlan) -> Result> { if self.apply_order == ApplyOrder::TopDown { - { - self.rule.rewrite(node, self.config) - } + self.rule.rewrite(node, self.config) } else { Ok(Transformed::no(node)) } @@ -298,9 +296,7 @@ impl TreeNodeRewriter for Rewriter<'_> { fn f_up(&mut self, node: LogicalPlan) -> Result> { if self.apply_order == ApplyOrder::BottomUp { - { - self.rule.rewrite(node, self.config) - } + self.rule.rewrite(node, self.config) } else { Ok(Transformed::no(node)) } @@ -464,10 +460,10 @@ mod tests { use datafusion_common::tree_node::Transformed; use datafusion_common::{ - assert_contains, plan_err, DFSchema, DFSchemaRef, DataFusionError, Result, + DFSchema, DFSchemaRef, DataFusionError, Result, assert_contains, plan_err, }; use datafusion_expr::logical_plan::EmptyRelation; - use datafusion_expr::{col, lit, LogicalPlan, LogicalPlanBuilder, Projection}; + use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, Projection, col, lit}; use crate::optimizer::Optimizer; use crate::test::test_table_scan; diff --git a/datafusion/optimizer/src/plan_signature.rs b/datafusion/optimizer/src/plan_signature.rs index 73e6b418272a..6f46d7b66334 100644 --- a/datafusion/optimizer/src/plan_signature.rs +++ b/datafusion/optimizer/src/plan_signature.rs @@ -89,7 +89,7 @@ mod tests { use std::sync::Arc; use datafusion_common::{DFSchema, Result}; - use datafusion_expr::{lit, LogicalPlan}; + use datafusion_expr::{LogicalPlan, lit}; use crate::plan_signature::get_node_number; diff --git a/datafusion/optimizer/src/propagate_empty_relation.rs b/datafusion/optimizer/src/propagate_empty_relation.rs index 629b13e4001d..2b02b7779461 100644 --- a/datafusion/optimizer/src/propagate_empty_relation.rs +++ b/datafusion/optimizer/src/propagate_empty_relation.rs @@ -19,9 +19,9 @@ use std::sync::Arc; -use datafusion_common::tree_node::Transformed; use datafusion_common::JoinType; -use datafusion_common::{plan_err, Result}; +use datafusion_common::tree_node::Transformed; +use datafusion_common::{Result, plan_err}; use datafusion_expr::logical_plan::LogicalPlan; use datafusion_expr::{EmptyRelation, Projection, Union}; @@ -140,10 +140,10 @@ impl OptimizerRule for PropagateEmptyRelation { } } LogicalPlan::Aggregate(ref agg) => { - if !agg.group_expr.is_empty() { - if let Some(empty_plan) = empty_child(&plan)? { - return Ok(Transformed::yes(empty_plan)); - } + if !agg.group_expr.is_empty() + && let Some(empty_plan) = empty_child(&plan)? + { + return Ok(Transformed::yes(empty_plan)); } Ok(Transformed::no(LogicalPlan::Aggregate(agg.clone()))) } @@ -239,9 +239,10 @@ mod tests { use datafusion_common::{Column, DFSchema, JoinType}; use datafusion_expr::logical_plan::table_scan; use datafusion_expr::{ - binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder, Operator, + Operator, binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder, }; + use crate::OptimizerContext; use crate::assert_optimized_plan_eq_snapshot; use crate::eliminate_filter::EliminateFilter; use crate::optimize_unions::OptimizeUnions; @@ -249,7 +250,6 @@ mod tests { assert_optimized_plan_with_rules, test_table_scan, test_table_scan_fields, test_table_scan_with_name, }; - use crate::OptimizerContext; use super::*; diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index ea0980ad4e1c..552d6ae2da6c 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -28,8 +28,8 @@ use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, }; use datafusion_common::{ - assert_eq_or_internal_err, assert_or_internal_err, internal_err, plan_err, - qualified_name, Column, DFSchema, Result, + Column, DFSchema, Result, assert_eq_or_internal_err, assert_or_internal_err, + internal_err, plan_err, qualified_name, }; use datafusion_expr::expr::WindowFunction; use datafusion_expr::expr_rewriter::replace_col; @@ -38,7 +38,7 @@ use datafusion_expr::utils::{ conjunction, expr_to_columns, split_conjunction, split_conjunction_owned, }; use datafusion_expr::{ - and, or, BinaryExpr, Expr, Filter, Operator, Projection, TableProviderFilterPushDown, + BinaryExpr, Expr, Filter, Operator, Projection, TableProviderFilterPushDown, and, or, }; use crate::optimizer::ApplyOrder; @@ -1435,17 +1435,17 @@ mod tests { use datafusion_expr::expr::{ScalarFunction, WindowFunction}; use datafusion_expr::logical_plan::table_scan; use datafusion_expr::{ - col, in_list, in_subquery, lit, ColumnarValue, ExprFunctionExt, Extension, - LogicalPlanBuilder, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, - TableSource, TableType, UserDefinedLogicalNodeCore, Volatility, - WindowFunctionDefinition, + ColumnarValue, ExprFunctionExt, Extension, LogicalPlanBuilder, + ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, TableSource, TableType, + UserDefinedLogicalNodeCore, Volatility, WindowFunctionDefinition, col, in_list, + in_subquery, lit, }; + use crate::OptimizerContext; use crate::assert_optimized_plan_eq_snapshot; use crate::optimizer::Optimizer; use crate::simplify_expressions::SimplifyExpressions; use crate::test::*; - use crate::OptimizerContext; use datafusion_expr::test::function_stub::sum; use insta::assert_snapshot; diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs index 80d4a2de6679..dbd8e1892949 100644 --- a/datafusion/optimizer/src/push_down_limit.rs +++ b/datafusion/optimizer/src/push_down_limit.rs @@ -23,11 +23,11 @@ use std::sync::Arc; use crate::optimizer::ApplyOrder; use crate::{OptimizerConfig, OptimizerRule}; +use datafusion_common::Result; use datafusion_common::tree_node::Transformed; use datafusion_common::utils::combine_limit; -use datafusion_common::Result; use datafusion_expr::logical_plan::{Join, JoinType, Limit, LogicalPlan}; -use datafusion_expr::{lit, FetchType, SkipType}; +use datafusion_expr::{FetchType, SkipType, lit}; /// Optimization rule that tries to push down `LIMIT`. //. It will push down through projection, limits (taking the smaller limit) @@ -281,8 +281,8 @@ mod test { use crate::OptimizerContext; use datafusion_common::DFSchemaRef; use datafusion_expr::{ - col, exists, logical_plan::builder::LogicalPlanBuilder, Expr, Extension, - UserDefinedLogicalNodeCore, + Expr, Extension, UserDefinedLogicalNodeCore, col, exists, + logical_plan::builder::LogicalPlanBuilder, }; use datafusion_functions_aggregate::expr_fn::max; diff --git a/datafusion/optimizer/src/replace_distinct_aggregate.rs b/datafusion/optimizer/src/replace_distinct_aggregate.rs index 215f5e240d5d..3aeae8287750 100644 --- a/datafusion/optimizer/src/replace_distinct_aggregate.rs +++ b/datafusion/optimizer/src/replace_distinct_aggregate.rs @@ -25,8 +25,8 @@ use datafusion_common::tree_node::Transformed; use datafusion_common::{Column, Result}; use datafusion_expr::expr_rewriter::normalize_cols; use datafusion_expr::utils::expand_wildcard; -use datafusion_expr::{col, lit, ExprFunctionExt, Limit, LogicalPlanBuilder}; use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan}; +use datafusion_expr::{ExprFunctionExt, Limit, LogicalPlanBuilder, col, lit}; /// Optimizer that replaces logical [[Distinct]] with a logical [[Aggregate]] /// @@ -214,7 +214,7 @@ mod tests { use crate::OptimizerContext; use datafusion_common::Result; use datafusion_expr::{ - col, logical_plan::builder::LogicalPlanBuilder, table_scan, Expr, + Expr, col, logical_plan::builder::LogicalPlanBuilder, table_scan, }; use datafusion_functions_aggregate::sum::sum; diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs index 2df1be1b7f0b..8a4569e45547 100644 --- a/datafusion/optimizer/src/scalar_subquery_to_join.rs +++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs @@ -30,11 +30,11 @@ use datafusion_common::alias::AliasGenerator; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter, }; -use datafusion_common::{assert_or_internal_err, plan_err, Column, Result, ScalarValue}; +use datafusion_common::{Column, Result, ScalarValue, assert_or_internal_err, plan_err}; use datafusion_expr::expr_rewriter::create_col_from_scalar_expr; use datafusion_expr::logical_plan::{JoinType, Subquery}; use datafusion_expr::utils::conjunction; -use datafusion_expr::{expr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder}; +use datafusion_expr::{EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder, expr}; /// Optimizer rule for rewriting subquery filters to joins /// and places additional projection on top of the filter, to preserve @@ -166,29 +166,25 @@ impl OptimizerRule for ScalarSubqueryToJoin { build_join(&subquery, &cur_input, &alias)? { cur_input = optimized_subquery; - if !expr_check_map.is_empty() { - if let Some(expr) = subquery_to_expr_map.get(&subquery) { - if let Some(rewrite_expr) = - expr_to_rewrite_expr_map.get(expr) - { - let new_expr = rewrite_expr - .clone() - .transform_up(|expr| { - // replace column references with entry in map, if it exists - if let Some(map_expr) = - expr.try_as_col().and_then(|col| { - expr_check_map.get(&col.name) - }) - { - Ok(Transformed::yes(map_expr.clone())) - } else { - Ok(Transformed::no(expr)) - } - }) - .data()?; - expr_to_rewrite_expr_map.insert(expr, new_expr); - } - } + if !expr_check_map.is_empty() + && let Some(expr) = subquery_to_expr_map.get(&subquery) + && let Some(rewrite_expr) = expr_to_rewrite_expr_map.get(expr) + { + let new_expr = rewrite_expr + .clone() + .transform_up(|expr| { + // replace column references with entry in map, if it exists + if let Some(map_expr) = expr + .try_as_col() + .and_then(|col| expr_check_map.get(&col.name)) + { + Ok(Transformed::yes(map_expr.clone())) + } else { + Ok(Transformed::no(expr)) + } + }) + .data()?; + expr_to_rewrite_expr_map.insert(expr, new_expr); } } else { // if we can't handle all of the subqueries then bail for now @@ -417,7 +413,7 @@ mod tests { use datafusion_expr::test::function_stub::sum; use crate::assert_optimized_plan_eq_display_indent_snapshot; - use datafusion_expr::{col, lit, out_ref_col, scalar_subquery, Between}; + use datafusion_expr::{Between, col, lit, out_ref_col, scalar_subquery}; use datafusion_functions_aggregate::min_max::{max, min}; macro_rules! assert_optimized_plan_equal { diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 366c99ce8f28..e4952ada4794 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -18,7 +18,7 @@ //! Expression simplification API use arrow::{ - array::{new_null_array, AsArray}, + array::{AsArray, new_null_array}, datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, }; @@ -28,36 +28,36 @@ use std::ops::Not; use std::sync::Arc; use datafusion_common::{ + DFSchema, DataFusionError, Result, ScalarValue, exec_datafusion_err, internal_err, +}; +use datafusion_common::{ + HashMap, cast::{as_large_list_array, as_list_array}, metadata::FieldMetadata, tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter}, - HashMap, -}; -use datafusion_common::{ - exec_datafusion_err, internal_err, DFSchema, DataFusionError, Result, ScalarValue, }; use datafusion_expr::{ - and, binary::BinaryTypeCoercer, lit, or, BinaryExpr, Case, ColumnarValue, Expr, Like, - Operator, Volatility, + BinaryExpr, Case, ColumnarValue, Expr, Like, Operator, Volatility, and, + binary::BinaryTypeCoercer, lit, or, }; +use datafusion_expr::{Cast, TryCast, simplify::ExprSimplifyResult}; use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval}; use datafusion_expr::{ expr::{InList, InSubquery}, utils::{iter_conjunction, iter_conjunction_owned}, }; -use datafusion_expr::{simplify::ExprSimplifyResult, Cast, TryCast}; use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps}; use super::inlist_simplifier::ShortenInListSimplifier; use super::utils::*; use crate::analyzer::type_coercion::TypeCoercionRewriter; +use crate::simplify_expressions::SimplifyInfo; use crate::simplify_expressions::regex::simplify_regex_expr; use crate::simplify_expressions::unwrap_cast::{ is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary, is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist, unwrap_cast_in_comparison_for_binary, }; -use crate::simplify_expressions::SimplifyInfo; use datafusion_expr::expr_rewriter::rewrite_with_guarantees_map; use datafusion_expr_common::casts::try_cast_literal_to_type; use indexmap::IndexSet; @@ -580,10 +580,9 @@ impl TreeNodeRewriter for ConstEvaluator<'_> { // This provides clearer error messages and fails fast. if let Expr::Cast(Cast { ref expr, .. }) | Expr::TryCast(TryCast { ref expr, .. }) = expr + && matches!(expr.as_ref(), Expr::Literal(_, _)) { - if matches!(expr.as_ref(), Expr::Literal(_, _)) { - return Err(err); - } + return Err(err); } // For other expressions (like CASE, COALESCE), preserve the original // to allow short-circuit evaluation at execution time @@ -710,7 +709,10 @@ impl<'a> ConstEvaluator<'a> { ColumnarValue::Array(a) => { if a.len() != 1 { ConstSimplifyResult::SimplifyRuntimeError( - exec_datafusion_err!("Could not evaluate the expression, found a result of length {}", a.len()), + exec_datafusion_err!( + "Could not evaluate the expression, found a result of length {}", + a.len() + ), expr, ) } else if as_list_array(&a).is_ok() { @@ -1050,7 +1052,9 @@ impl TreeNodeRewriter for Simplifier<'_, S> { right: left_right, })) } else { - return internal_err!("can_reduce_to_equal_statement should only be called with a BinaryExpr"); + return internal_err!( + "can_reduce_to_equal_statement should only be called with a BinaryExpr" + ); } } @@ -2159,7 +2163,7 @@ mod tests { use crate::simplify_expressions::SimplifyContext; use crate::test::test_table_scan_with_name; use arrow::datatypes::FieldRef; - use datafusion_common::{assert_contains, DFSchemaRef, ToDFSchema}; + use datafusion_common::{DFSchemaRef, ToDFSchema, assert_contains}; use datafusion_expr::{ expr::WindowFunction, function::{ diff --git a/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs index a1c1dc17d294..17112d4f0ae2 100644 --- a/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs @@ -19,10 +19,10 @@ use super::THRESHOLD_INLINE_INLIST; -use datafusion_common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion_common::Result; -use datafusion_expr::expr::InList; +use datafusion_common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion_expr::Expr; +use datafusion_expr::expr::InList; pub(super) struct ShortenInListSimplifier {} @@ -43,52 +43,50 @@ impl TreeNodeRewriter for ShortenInListSimplifier { ref list, negated, }) = expr + && !list.is_empty() + && ( + // For lists with only 1 value we allow more complex expressions to be simplified + // e.g SUBSTR(c1, 2, 3) IN ('1') -> SUBSTR(c1, 2, 3) = '1' + // for more than one we avoid repeating this potentially expensive + // expressions + list.len() == 1 + || list.len() <= THRESHOLD_INLINE_INLIST + && expr.try_as_col().is_some() + ) { - if !list.is_empty() - && ( - // For lists with only 1 value we allow more complex expressions to be simplified - // e.g SUBSTR(c1, 2, 3) IN ('1') -> SUBSTR(c1, 2, 3) = '1' - // for more than one we avoid repeating this potentially expensive - // expressions - list.len() == 1 - || list.len() <= THRESHOLD_INLINE_INLIST - && expr.try_as_col().is_some() - ) - { - let first_val = list[0].clone(); - if negated { - return Ok(Transformed::yes(list.iter().skip(1).cloned().fold( - (*expr.clone()).not_eq(first_val), - |acc, y| { - // Note that `A and B and C and D` is a left-deep tree structure - // as such we want to maintain this structure as much as possible - // to avoid reordering the expression during each optimization - // pass. - // - // Left-deep tree structure for `A and B and C and D`: - // ``` - // & - // / \ - // & D - // / \ - // & C - // / \ - // A B - // ``` - // - // The code below maintain the left-deep tree structure. - acc.and((*expr.clone()).not_eq(y)) - }, - ))); - } else { - return Ok(Transformed::yes(list.iter().skip(1).cloned().fold( - (*expr.clone()).eq(first_val), - |acc, y| { - // Same reasoning as above - acc.or((*expr.clone()).eq(y)) - }, - ))); - } + let first_val = list[0].clone(); + if negated { + return Ok(Transformed::yes(list.iter().skip(1).cloned().fold( + (*expr.clone()).not_eq(first_val), + |acc, y| { + // Note that `A and B and C and D` is a left-deep tree structure + // as such we want to maintain this structure as much as possible + // to avoid reordering the expression during each optimization + // pass. + // + // Left-deep tree structure for `A and B and C and D`: + // ``` + // & + // / \ + // & D + // / \ + // & C + // / \ + // A B + // ``` + // + // The code below maintain the left-deep tree structure. + acc.and((*expr.clone()).not_eq(y)) + }, + ))); + } else { + return Ok(Transformed::yes(list.iter().skip(1).cloned().fold( + (*expr.clone()).eq(first_val), + |acc, y| { + // Same reasoning as above + acc.or((*expr.clone()).eq(y)) + }, + ))); } } diff --git a/datafusion/optimizer/src/simplify_expressions/regex.rs b/datafusion/optimizer/src/simplify_expressions/regex.rs index 82c5ea3d8d82..d388aaf74cda 100644 --- a/datafusion/optimizer/src/simplify_expressions/regex.rs +++ b/datafusion/optimizer/src/simplify_expressions/regex.rs @@ -16,7 +16,7 @@ // under the License. use datafusion_common::{DataFusionError, Result, ScalarValue}; -use datafusion_expr::{lit, BinaryExpr, Expr, Like, Operator}; +use datafusion_expr::{BinaryExpr, Expr, Like, Operator, lit}; use regex_syntax::hir::{Capture, Hir, HirKind, Literal, Look}; /// Maximum number of regex alternations (`foo|bar|...`) that will be expanded into multiple `LIKE` expressions. @@ -68,10 +68,10 @@ pub fn simplify_regex_expr( Ok(hir) => { let kind = hir.kind(); if let HirKind::Alternation(alts) = kind { - if alts.len() <= MAX_REGEX_ALTERNATIONS_EXPANSION { - if let Some(expr) = lower_alt(&mode, &left, alts) { - return Ok(expr); - } + if alts.len() <= MAX_REGEX_ALTERNATIONS_EXPANSION + && let Some(expr) = lower_alt(&mode, &left, alts) + { + return Ok(expr); } } else if let Some(expr) = lower_simple(&mode, &left, &hir) { return Ok(expr); @@ -287,11 +287,11 @@ fn anchored_alternation_to_exprs(v: &[Hir]) -> Option> { let mut literals = Vec::with_capacity(alters.len()); for hir in alters { let mut is_safe = false; - if let HirKind::Literal(l) = hir.kind() { - if let Some(safe_literal) = str_from_literal(l).map(lit) { - literals.push(safe_literal); - is_safe = true; - } + if let HirKind::Literal(l) = hir.kind() + && let Some(safe_literal) = str_from_literal(l).map(lit) + { + literals.push(safe_literal); + is_safe = true; } if !is_safe { diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index 4faf9389cfac..fcb8ba891bb9 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -21,11 +21,11 @@ use std::sync::Arc; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result}; +use datafusion_expr::Expr; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::logical_plan::LogicalPlan; use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::utils::merge_schema; -use datafusion_expr::Expr; use crate::optimizer::ApplyOrder; use crate::utils::NamePreserver; @@ -161,9 +161,9 @@ mod tests { use datafusion_expr::*; use datafusion_functions_aggregate::expr_fn::{max, min}; + use crate::OptimizerContext; use crate::assert_optimized_plan_eq_snapshot; use crate::test::{assert_fields_eq, test_table_scan_with_name}; - use crate::OptimizerContext; use super::*; @@ -492,8 +492,7 @@ mod tests { .build()?; let actual = get_optimized_plan_formatted(plan, &time); - let expected = - "Projection: NOT test.a AS Boolean(true) OR Boolean(false) != test.a\ + let expected = "Projection: NOT test.a AS Boolean(true) OR Boolean(false) != test.a\ \n TableScan: test"; assert_eq!(expected, actual); diff --git a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs index b1f3b006e0cf..b2349db8c460 100644 --- a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs +++ b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs @@ -55,10 +55,10 @@ //! ``` use arrow::datatypes::DataType; -use datafusion_common::{internal_err, tree_node::Transformed}; use datafusion_common::{Result, ScalarValue}; -use datafusion_expr::{lit, BinaryExpr}; -use datafusion_expr::{simplify::SimplifyInfo, Cast, Expr, Operator, TryCast}; +use datafusion_common::{internal_err, tree_node::Transformed}; +use datafusion_expr::{BinaryExpr, lit}; +use datafusion_expr::{Cast, Expr, Operator, TryCast, simplify::SimplifyInfo}; use datafusion_expr_common::casts::{is_supported_type, try_cast_literal_to_type}; pub(super) fn unwrap_cast_in_comparison_for_binary( diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs index 35e256f3064e..1f214e3d365c 100644 --- a/datafusion/optimizer/src/simplify_expressions/utils.rs +++ b/datafusion/optimizer/src/simplify_expressions/utils.rs @@ -18,11 +18,11 @@ //! Utility functions for expression simplification use arrow::datatypes::i256; -use datafusion_common::{internal_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, internal_err}; use datafusion_expr::{ + Case, Expr, Like, Operator, expr::{Between, BinaryExpr, InList}, expr_fn::{and, bitwise_and, bitwise_or, or}, - Case, Expr, Like, Operator, }; pub static POWS_OF_TEN: [i128; 38] = [ diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs index 8eb4ae3976f9..3ad56820f029 100644 --- a/datafusion/optimizer/src/single_distinct_to_groupby.rs +++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs @@ -23,15 +23,14 @@ use crate::optimizer::ApplyOrder; use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::{ - assert_eq_or_internal_err, tree_node::Transformed, DataFusionError, HashSet, Result, + DataFusionError, HashSet, Result, assert_eq_or_internal_err, tree_node::Transformed, }; use datafusion_expr::builder::project; use datafusion_expr::expr::AggregateFunctionParams; use datafusion_expr::{ - col, + Expr, col, expr::AggregateFunction, logical_plan::{Aggregate, LogicalPlan}, - Expr, }; /// single distinct to group by optimizer rule @@ -288,8 +287,8 @@ mod tests { use super::*; use crate::assert_optimized_plan_eq_display_indent_snapshot; use crate::test::*; - use datafusion_expr::expr::GroupingSet; use datafusion_expr::ExprFunctionExt; + use datafusion_expr::expr::GroupingSet; use datafusion_expr::{lit, logical_plan::builder::LogicalPlanBuilder}; use datafusion_functions_aggregate::count::count_udaf; use datafusion_functions_aggregate::expr_fn::{count, count_distinct, max, min, sum}; diff --git a/datafusion/optimizer/src/test/mod.rs b/datafusion/optimizer/src/test/mod.rs index 6e0b734bb928..a45983950496 100644 --- a/datafusion/optimizer/src/test/mod.rs +++ b/datafusion/optimizer/src/test/mod.rs @@ -20,8 +20,8 @@ use crate::optimizer::Optimizer; use crate::{OptimizerContext, OptimizerRule}; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::config::ConfigOptions; -use datafusion_common::{assert_contains, Result}; -use datafusion_expr::{logical_plan::table_scan, LogicalPlan, LogicalPlanBuilder}; +use datafusion_common::{Result, assert_contains}; +use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, logical_plan::table_scan}; use std::sync::Arc; pub mod user_defined; diff --git a/datafusion/optimizer/src/test/user_defined.rs b/datafusion/optimizer/src/test/user_defined.rs index a39f90b5da5d..878ce274d5ed 100644 --- a/datafusion/optimizer/src/test/user_defined.rs +++ b/datafusion/optimizer/src/test/user_defined.rs @@ -19,8 +19,8 @@ use datafusion_common::DFSchemaRef; use datafusion_expr::{ - logical_plan::{Extension, UserDefinedLogicalNodeCore}, Expr, LogicalPlan, + logical_plan::{Extension, UserDefinedLogicalNodeCore}, }; use std::{ fmt::{self, Debug}, diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index 81763fa0552f..7e038d239202 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -20,14 +20,14 @@ use std::collections::{BTreeSet, HashMap, HashSet}; use crate::analyzer::type_coercion::TypeCoercionRewriter; -use arrow::array::{new_null_array, Array, RecordBatch}; +use arrow::array::{Array, RecordBatch, new_null_array}; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::cast::as_boolean_array; use datafusion_common::tree_node::{TransformedResult, TreeNode}; use datafusion_common::{Column, DFSchema, Result, ScalarValue}; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::expr_rewriter::replace_col; -use datafusion_expr::{logical_plan::LogicalPlan, ColumnarValue, Expr}; +use datafusion_expr::{ColumnarValue, Expr, logical_plan::LogicalPlan}; use datafusion_physical_expr::create_physical_expr; use log::{debug, trace}; use std::sync::Arc; @@ -154,7 +154,7 @@ fn coerce(expr: Expr, schema: &DFSchema) -> Result { #[cfg(test)] mod tests { use super::*; - use datafusion_expr::{binary_expr, case, col, in_list, is_null, lit, Operator}; + use datafusion_expr::{Operator, binary_expr, case, col, in_list, is_null, lit}; #[test] fn expr_is_restrict_null_predicate() -> Result<()> { diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index 180d85be20fb..951d78bc899c 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; use datafusion_common::config::ConfigOptions; -use datafusion_common::{plan_err, Result, TableReference}; +use datafusion_common::{Result, TableReference, plan_err}; use datafusion_expr::planner::ExprPlanner; use datafusion_expr::test::function_stub::sum_udaf; use datafusion_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource, WindowUDF}; diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index b4a212b9d8cd..4908510a20c7 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -21,7 +21,7 @@ description = "Physical (ExecutionPlan) implementations for DataFusion query eng keywords = ["arrow", "query", "sql"] readme = "README.md" version = { workspace = true } -edition = { workspace = true } +edition = "2024" homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } diff --git a/datafusion/physical-plan/benches/aggregate_vectorized.rs b/datafusion/physical-plan/benches/aggregate_vectorized.rs index 66e7a28a28b4..a93088a4ebe7 100644 --- a/datafusion/physical-plan/benches/aggregate_vectorized.rs +++ b/datafusion/physical-plan/benches/aggregate_vectorized.rs @@ -25,11 +25,11 @@ use arrow::util::test_util::seedable_rng; use arrow_schema::DataType; use criterion::measurement::WallTime; use criterion::{ - criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion, + BenchmarkGroup, BenchmarkId, Criterion, criterion_group, criterion_main, }; +use datafusion_physical_plan::aggregates::group_values::multi_group_by::GroupColumn; use datafusion_physical_plan::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder; use datafusion_physical_plan::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder; -use datafusion_physical_plan::aggregates::group_values::multi_group_by::GroupColumn; use rand::distr::{Bernoulli, Distribution}; use std::hint::black_box; use std::sync::Arc; diff --git a/datafusion/physical-plan/benches/partial_ordering.rs b/datafusion/physical-plan/benches/partial_ordering.rs index e1a9d0b583e9..bdadd6274b75 100644 --- a/datafusion/physical-plan/benches/partial_ordering.rs +++ b/datafusion/physical-plan/benches/partial_ordering.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use arrow::array::{ArrayRef, Int32Array}; use datafusion_physical_plan::aggregates::order::GroupOrderingPartial; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; const BATCH_SIZE: usize = 8192; diff --git a/datafusion/physical-plan/benches/sort_preserving_merge.rs b/datafusion/physical-plan/benches/sort_preserving_merge.rs index f223fd806b69..76ebf230a30e 100644 --- a/datafusion/physical-plan/benches/sort_preserving_merge.rs +++ b/datafusion/physical-plan/benches/sort_preserving_merge.rs @@ -20,9 +20,9 @@ use arrow::{ record_batch::RecordBatch, }; use arrow_schema::{SchemaRef, SortOptions}; -use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; +use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{expressions::col, LexOrdering, PhysicalSortExpr}; +use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr, expressions::col}; use datafusion_physical_plan::test::TestMemoryExec; use datafusion_physical_plan::{ collect, sorts::sort_preserving_merge::SortPreservingMergeExec, diff --git a/datafusion/physical-plan/benches/spill_io.rs b/datafusion/physical-plan/benches/spill_io.rs index fbf190518d07..fac2547a131b 100644 --- a/datafusion/physical-plan/benches/spill_io.rs +++ b/datafusion/physical-plan/benches/spill_io.rs @@ -22,15 +22,15 @@ use arrow::array::{ use arrow::datatypes::{DataType, Field, Schema}; use criterion::measurement::WallTime; use criterion::{ - criterion_group, criterion_main, BatchSize, BenchmarkGroup, BenchmarkId, Criterion, + BatchSize, BenchmarkGroup, BenchmarkId, Criterion, criterion_group, criterion_main, }; use datafusion_common::config::SpillCompression; use datafusion_common::human_readable_size; use datafusion_common::instant::Instant; use datafusion_execution::runtime_env::RuntimeEnv; +use datafusion_physical_plan::SpillManager; use datafusion_physical_plan::common::collect; use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, SpillMetrics}; -use datafusion_physical_plan::SpillManager; use rand::{Rng, SeedableRng}; use std::sync::Arc; use tokio::runtime::Runtime; diff --git a/datafusion/physical-plan/src/aggregates/group_values/metrics.rs b/datafusion/physical-plan/src/aggregates/group_values/metrics.rs index c4e29ea71060..b6c32204e85f 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/metrics.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/metrics.rs @@ -53,7 +53,7 @@ mod tests { use crate::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy}; use crate::metrics::MetricsSet; use crate::test::TestMemoryExec; - use crate::{collect, ExecutionPlan}; + use crate::{ExecutionPlan, collect}; use arrow::array::{Float64Array, UInt32Array}; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; diff --git a/datafusion/physical-plan/src/aggregates/group_values/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/mod.rs index 4bd7f03506a1..f419328d1125 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/mod.rs @@ -22,7 +22,7 @@ use arrow::array::types::{ Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, }; -use arrow::array::{downcast_primitive, ArrayRef, RecordBatch}; +use arrow::array::{ArrayRef, RecordBatch, downcast_primitive}; use arrow::datatypes::{DataType, SchemaRef, TimeUnit}; use datafusion_common::Result; diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs index 03e26446f575..91a39f28f33c 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs @@ -18,7 +18,7 @@ use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::Nulls; -use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn}; +use crate::aggregates::group_values::multi_group_by::{GroupColumn, nulls_equal_to}; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{Array as _, ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder}; use datafusion_common::Result; diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs index d52721c2ee6c..cd173741b646 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs @@ -16,18 +16,18 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - nulls_equal_to, GroupColumn, Nulls, + GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{ - types::GenericStringType, Array, ArrayRef, AsArray, BufferBuilder, - GenericBinaryArray, GenericByteArray, GenericStringArray, OffsetSizeTrait, + Array, ArrayRef, AsArray, BufferBuilder, GenericBinaryArray, GenericByteArray, + GenericStringArray, OffsetSizeTrait, types::GenericStringType, }; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{ByteArrayType, DataType, GenericBinaryType}; use datafusion_common::utils::proxy::VecAllocExt; -use datafusion_common::{exec_datafusion_err, Result}; -use datafusion_physical_expr_common::binary_map::{OutputType, INITIAL_BUFFER_CAPACITY}; +use datafusion_common::{Result, exec_datafusion_err}; +use datafusion_physical_expr_common::binary_map::{INITIAL_BUFFER_CAPACITY, OutputType}; use itertools::izip; use std::mem::size_of; use std::sync::Arc; diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index fde477c2cf7b..31a152aa7417 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -16,10 +16,10 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - nulls_equal_to, GroupColumn, Nulls, + GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; -use arrow::array::{make_view, Array, ArrayRef, AsArray, ByteView, GenericByteViewArray}; +use arrow::array::{Array, ArrayRef, AsArray, ByteView, GenericByteViewArray, make_view}; use arrow::buffer::{Buffer, ScalarBuffer}; use arrow::datatypes::ByteViewType; use datafusion_common::Result; diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs index 9adf028eca7f..b62bc11aff01 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs @@ -24,24 +24,24 @@ pub mod primitive; use std::mem::{self, size_of}; +use crate::aggregates::group_values::GroupValues; use crate::aggregates::group_values::multi_group_by::{ boolean::BooleanGroupValueBuilder, bytes::ByteGroupValueBuilder, bytes_view::ByteViewGroupValueBuilder, primitive::PrimitiveGroupValueBuilder, }; -use crate::aggregates::group_values::GroupValues; use ahash::RandomState; use arrow::array::{Array, ArrayRef, RecordBatch}; use arrow::compute::cast; use arrow::datatypes::{ BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type, - Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, Schema, SchemaRef, + Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, Schema, SchemaRef, StringViewType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, - TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, - UInt8Type, + TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, + UInt64Type, }; use datafusion_common::hash_utils::create_hashes; -use datafusion_common::{internal_datafusion_err, not_impl_err, Result}; +use datafusion_common::{Result, internal_datafusion_err, not_impl_err}; use datafusion_execution::memory_pool::proxy::{HashTableAllocExt, VecAllocExt}; use datafusion_expr::EmitTo; use datafusion_physical_expr::binary_map::OutputType; @@ -1048,7 +1048,7 @@ impl GroupValues for GroupValuesColumn { } } dt => { - return not_impl_err!("{dt} not supported in GroupValuesColumn") + return not_impl_err!("{dt} not supported in GroupValuesColumn"); } } } @@ -1261,7 +1261,7 @@ mod tests { use datafusion_expr::EmitTo; use crate::aggregates::group_values::{ - multi_group_by::GroupValuesColumn, GroupValues, + GroupValues, multi_group_by::GroupValuesColumn, }; use super::GroupIndexView; diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index df2cf4bdecce..31126348b3fd 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -16,11 +16,11 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - nulls_equal_to, GroupColumn, Nulls, + GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::ArrowNativeTypeOp; -use arrow::array::{cast::AsArray, Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray}; +use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray, cast::AsArray}; use arrow::buffer::ScalarBuffer; use arrow::datatypes::DataType; use datafusion_common::Result; diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs index d632a7f0ad8a..a5e5c1600602 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/row.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs @@ -21,8 +21,8 @@ use arrow::array::{Array, ArrayRef, ListArray, RecordBatch, StructArray}; use arrow::compute::cast; use arrow::datatypes::{DataType, SchemaRef}; use arrow::row::{RowConverter, Rows, SortField}; -use datafusion_common::hash_utils::create_hashes; use datafusion_common::Result; +use datafusion_common::hash_utils::create_hashes; use datafusion_execution::memory_pool::proxy::{HashTableAllocExt, VecAllocExt}; use datafusion_expr::EmitTo; use hashbrown::hash_table::HashTable; diff --git a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs index f35c580b0e63..41d34218f6a0 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs @@ -19,10 +19,10 @@ use crate::aggregates::group_values::GroupValues; use ahash::RandomState; use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano}; use arrow::array::{ - cast::AsArray, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, NullBufferBuilder, - PrimitiveArray, + ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, NullBufferBuilder, PrimitiveArray, + cast::AsArray, }; -use arrow::datatypes::{i256, DataType}; +use arrow::datatypes::{DataType, i256}; use arrow::record_batch::RecordBatch; use datafusion_common::Result; use datafusion_execution::memory_pool::proxy::VecAllocExt; diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index f2dd78579444..3dbcce699c24 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -41,23 +41,23 @@ use datafusion_physical_expr::utils::collect_columns; use parking_lot::Mutex; use std::collections::HashSet; -use arrow::array::{ArrayRef, UInt16Array, UInt32Array, UInt64Array, UInt8Array}; +use arrow::array::{ArrayRef, UInt8Array, UInt16Array, UInt32Array, UInt64Array}; use arrow::datatypes::{Field, Schema, SchemaRef}; use arrow::record_batch::RecordBatch; use arrow_schema::FieldRef; use datafusion_common::stats::Precision; use datafusion_common::{ - assert_eq_or_internal_err, not_impl_err, Constraint, Constraints, Result, ScalarValue, + Constraint, Constraints, Result, ScalarValue, assert_eq_or_internal_err, not_impl_err, }; use datafusion_execution::TaskContext; use datafusion_expr::{Accumulator, Aggregate}; use datafusion_physical_expr::aggregate::AggregateFunctionExpr; use datafusion_physical_expr::equivalence::ProjectionMapping; -use datafusion_physical_expr::expressions::{lit, Column, DynamicFilterPhysicalExpr}; +use datafusion_physical_expr::expressions::{Column, DynamicFilterPhysicalExpr, lit}; use datafusion_physical_expr::{ - physical_exprs_contains, ConstExpr, EquivalenceProperties, + ConstExpr, EquivalenceProperties, physical_exprs_contains, }; -use datafusion_physical_expr_common::physical_expr::{fmt_sql, PhysicalExpr}; +use datafusion_physical_expr_common::physical_expr::{PhysicalExpr, fmt_sql}; use datafusion_physical_expr_common::sort_expr::{ LexOrdering, LexRequirement, OrderingRequirements, PhysicalSortRequirement, }; @@ -731,12 +731,12 @@ impl AggregateExec { } // grouping by an expression that has a sort/limit upstream - if let Some(limit) = self.limit { - if !self.is_unordered_unfiltered_group_by_distinct() { - return Ok(StreamType::GroupedPriorityQueue( - GroupedTopKAggregateStream::new(self, context, partition, limit)?, - )); - } + if let Some(limit) = self.limit + && !self.is_unordered_unfiltered_group_by_distinct() + { + return Ok(StreamType::GroupedPriorityQueue( + GroupedTopKAggregateStream::new(self, context, partition, limit)?, + )); } // grouping by something else and we need to just materialize all results @@ -971,15 +971,15 @@ impl AggregateExec { }; // 2. arg should be only 1 column reference - if let [arg] = aggr_expr.expressions().as_slice() { - if arg.as_any().is::() { - all_cols.push(Arc::clone(arg)); - aggr_dyn_filters.push(PerAccumulatorDynFilter { - aggr_type, - aggr_index: i, - shared_bound: Arc::new(Mutex::new(ScalarValue::Null)), - }); - } + if let [arg] = aggr_expr.expressions().as_slice() + && arg.as_any().is::() + { + all_cols.push(Arc::clone(arg)); + aggr_dyn_filters.push(PerAccumulatorDynFilter { + aggr_type, + aggr_index: i, + shared_bound: Arc::new(Mutex::new(ScalarValue::Null)), + }); } } @@ -1315,11 +1315,10 @@ impl ExecutionPlan for AggregateExec { // Include self dynamic filter when it's possible if matches!(phase, FilterPushdownPhase::Post) && config.optimizer.enable_aggregate_dynamic_filter_pushdown + && let Some(self_dyn_filter) = &self.dynamic_filter { - if let Some(self_dyn_filter) = &self.dynamic_filter { - let dyn_filter = Arc::clone(&self_dyn_filter.filter); - child_desc = child_desc.with_self_filter(dyn_filter); - } + let dyn_filter = Arc::clone(&self_dyn_filter.filter); + child_desc = child_desc.with_self_filter(dyn_filter); } Ok(FilterDescription::new().with_child(child_desc)) @@ -1790,6 +1789,7 @@ mod tests { use std::task::{Context, Poll}; use super::*; + use crate::RecordBatchStream; use crate::coalesce_batches::CoalesceBatchesExec; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::common; @@ -1797,19 +1797,18 @@ mod tests { use crate::execution_plan::Boundedness; use crate::expressions::col; use crate::metrics::MetricValue; - use crate::test::assert_is_pending; - use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec}; use crate::test::TestMemoryExec; - use crate::RecordBatchStream; + use crate::test::assert_is_pending; + use crate::test::exec::{BlockingExec, assert_strong_count_converges_to_zero}; use arrow::array::{ DictionaryArray, Float32Array, Float64Array, Int32Array, StructArray, UInt32Array, UInt64Array, }; - use arrow::compute::{concat_batches, SortOptions}; + use arrow::compute::{SortOptions, concat_batches}; use arrow::datatypes::{DataType, Int32Type}; use datafusion_common::test_util::{batches_to_sort_string, batches_to_string}; - use datafusion_common::{internal_err, DataFusionError, ScalarValue}; + use datafusion_common::{DataFusionError, ScalarValue, internal_err}; use datafusion_execution::config::SessionConfig; use datafusion_execution::memory_pool::FairSpillPool; use datafusion_execution::runtime_env::RuntimeEnvBuilder; @@ -1819,11 +1818,11 @@ mod tests { use datafusion_functions_aggregate::first_last::{first_value_udaf, last_value_udaf}; use datafusion_functions_aggregate::median::median_udaf; use datafusion_functions_aggregate::sum::sum_udaf; - use datafusion_physical_expr::aggregate::AggregateExprBuilder; - use datafusion_physical_expr::expressions::lit; - use datafusion_physical_expr::expressions::Literal; use datafusion_physical_expr::Partitioning; use datafusion_physical_expr::PhysicalSortExpr; + use datafusion_physical_expr::aggregate::AggregateExprBuilder; + use datafusion_physical_expr::expressions::Literal; + use datafusion_physical_expr::expressions::lit; use futures::{FutureExt, Stream}; use insta::{allow_duplicates, assert_snapshot}; @@ -2895,12 +2894,13 @@ mod tests { ], ); - let aggregates: Vec> = - vec![AggregateExprBuilder::new(count_udaf(), vec![lit(1)]) + let aggregates: Vec> = vec![ + AggregateExprBuilder::new(count_udaf(), vec![lit(1)]) .schema(Arc::clone(&schema)) .alias("1") .build() - .map(Arc::new)?]; + .map(Arc::new)?, + ]; let input_batches = (0..4) .map(|_| { @@ -3016,14 +3016,13 @@ mod tests { "labels".to_string(), )]); - let aggr_expr = vec![AggregateExprBuilder::new( - sum_udaf(), - vec![col("value", &batch.schema())?], - ) - .schema(Arc::clone(&batch.schema())) - .alias(String::from("SUM(value)")) - .build() - .map(Arc::new)?]; + let aggr_expr = vec![ + AggregateExprBuilder::new(sum_udaf(), vec![col("value", &batch.schema())?]) + .schema(Arc::clone(&batch.schema())) + .alias(String::from("SUM(value)")) + .build() + .map(Arc::new)?, + ]; let input = TestMemoryExec::try_new_exec( &[vec![batch.clone()]], @@ -3067,14 +3066,13 @@ mod tests { let group_by = PhysicalGroupBy::new_single(vec![(col("key", &schema)?, "key".to_string())]); - let aggr_expr = - vec![ - AggregateExprBuilder::new(count_udaf(), vec![col("val", &schema)?]) - .schema(Arc::clone(&schema)) - .alias(String::from("COUNT(val)")) - .build() - .map(Arc::new)?, - ]; + let aggr_expr = vec![ + AggregateExprBuilder::new(count_udaf(), vec![col("val", &schema)?]) + .schema(Arc::clone(&schema)) + .alias(String::from("COUNT(val)")) + .build() + .map(Arc::new)?, + ]; let input_data = vec![ RecordBatch::try_new( @@ -3147,14 +3145,13 @@ mod tests { let group_by = PhysicalGroupBy::new_single(vec![(col("key", &schema)?, "key".to_string())]); - let aggr_expr = - vec![ - AggregateExprBuilder::new(count_udaf(), vec![col("val", &schema)?]) - .schema(Arc::clone(&schema)) - .alias(String::from("COUNT(val)")) - .build() - .map(Arc::new)?, - ]; + let aggr_expr = vec![ + AggregateExprBuilder::new(count_udaf(), vec![col("val", &schema)?]) + .schema(Arc::clone(&schema)) + .alias(String::from("COUNT(val)")) + .build() + .map(Arc::new)?, + ]; let input_data = vec![ RecordBatch::try_new( @@ -3233,14 +3230,13 @@ mod tests { Field::new("b", DataType::Float32, false), ])); - let aggr_expr = - vec![ - AggregateExprBuilder::new(count_udaf(), vec![col("a", &input_schema)?]) - .schema(Arc::clone(&input_schema)) - .alias("COUNT(a)") - .build() - .map(Arc::new)?, - ]; + let aggr_expr = vec![ + AggregateExprBuilder::new(count_udaf(), vec![col("a", &input_schema)?]) + .schema(Arc::clone(&input_schema)) + .alias("COUNT(a)") + .build() + .map(Arc::new)?, + ]; let grouping_set = PhysicalGroupBy::new( vec![ @@ -3387,7 +3383,9 @@ mod tests { "Expected spill but SpillCount metric not found or SpillCount was 0." ); } else if !expect_spill && spill_count > 0 { - panic!("Expected no spill but found SpillCount metric with value greater than 0."); + panic!( + "Expected no spill but found SpillCount metric with value greater than 0." + ); } } else { panic!("No metrics returned from the operator; cannot verify spilling."); diff --git a/datafusion/physical-plan/src/aggregates/no_grouping.rs b/datafusion/physical-plan/src/aggregates/no_grouping.rs index 1834357ba6bb..291d68d3195c 100644 --- a/datafusion/physical-plan/src/aggregates/no_grouping.rs +++ b/datafusion/physical-plan/src/aggregates/no_grouping.rs @@ -18,18 +18,18 @@ //! Aggregate without grouping columns use crate::aggregates::{ - aggregate_expressions, create_accumulators, finalize_aggregation, AccumulatorItem, - AggrDynFilter, AggregateMode, DynamicFilterAggregateType, + AccumulatorItem, AggrDynFilter, AggregateMode, DynamicFilterAggregateType, + aggregate_expressions, create_accumulators, finalize_aggregation, }; use crate::metrics::{BaselineMetrics, RecordOutput}; use crate::{RecordBatchStream, SendableRecordBatchStream}; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; -use datafusion_common::{internal_datafusion_err, internal_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, internal_datafusion_err, internal_err}; use datafusion_execution::TaskContext; use datafusion_expr::Operator; -use datafusion_physical_expr::expressions::{lit, BinaryExpr}; use datafusion_physical_expr::PhysicalExpr; +use datafusion_physical_expr::expressions::{BinaryExpr, lit}; use futures::stream::BoxStream; use std::borrow::Cow; use std::cmp::Ordering; @@ -94,7 +94,9 @@ impl AggregateStreamInner { &self, ) -> Result> { let Some(filter_state) = self.agg_dyn_filter_state.as_ref() else { - return internal_err!("`build_dynamic_filter_from_accumulator_bounds()` is only called when dynamic filter is enabled"); + return internal_err!( + "`build_dynamic_filter_from_accumulator_bounds()` is only called when dynamic filter is enabled" + ); }; let mut predicates: Vec> = diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs index 615ed2817454..1e7757de4aac 100644 --- a/datafusion/physical-plan/src/aggregates/row_hash.rs +++ b/datafusion/physical-plan/src/aggregates/row_hash.rs @@ -21,31 +21,31 @@ use std::sync::Arc; use std::task::{Context, Poll}; use std::vec; -use super::order::GroupOrdering; use super::AggregateExec; -use crate::aggregates::group_values::{new_group_values, GroupByMetrics, GroupValues}; +use super::order::GroupOrdering; +use crate::aggregates::group_values::{GroupByMetrics, GroupValues, new_group_values}; use crate::aggregates::order::GroupOrderingFull; use crate::aggregates::{ - create_schema, evaluate_group_by, evaluate_many, evaluate_optional, AggregateMode, - PhysicalGroupBy, + AggregateMode, PhysicalGroupBy, create_schema, evaluate_group_by, evaluate_many, + evaluate_optional, }; use crate::metrics::{BaselineMetrics, MetricBuilder, RecordOutput}; use crate::sorts::sort::sort_batch; use crate::sorts::streaming_merge::{SortedSpillFile, StreamingMergeBuilder}; use crate::spill::spill_manager::SpillManager; use crate::stream::RecordBatchStreamAdapter; -use crate::{aggregates, metrics, PhysicalExpr}; +use crate::{PhysicalExpr, aggregates, metrics}; use crate::{RecordBatchStream, SendableRecordBatchStream}; use arrow::array::*; use arrow::datatypes::SchemaRef; use datafusion_common::{ - assert_eq_or_internal_err, assert_or_internal_err, internal_err, DataFusionError, - Result, + DataFusionError, Result, assert_eq_or_internal_err, assert_or_internal_err, + internal_err, }; +use datafusion_execution::TaskContext; use datafusion_execution::memory_pool::proxy::VecAllocExt; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; -use datafusion_execution::TaskContext; use datafusion_expr::{EmitTo, GroupsAccumulator}; use datafusion_physical_expr::aggregate::AggregateFunctionExpr; use datafusion_physical_expr::expressions::Column; @@ -862,7 +862,8 @@ impl Stream for GroupedHashAggregateStream { return Poll::Ready(Some(internal_err!( "AggregateStream was in Done state with {} groups left in hash table. \ This is a bug - all groups should have been emitted before entering Done state.", - self.group_values.len()))); + self.group_values.len() + ))); } // release the memory reservation since sending back output batch itself needs // some memory reservation, so make some room for it. @@ -1228,13 +1229,12 @@ impl GroupedHashAggregateStream { /// /// Returns `Some(ExecutionState)` if the state should be changed, None otherwise. fn switch_to_skip_aggregation(&mut self) -> Result> { - if let Some(probe) = self.skip_aggregation_probe.as_mut() { - if probe.should_skip() { - if let Some(batch) = self.emit(EmitTo::All, false)? { - return Ok(Some(ExecutionState::ProducingOutput(batch))); - }; - } - } + if let Some(probe) = self.skip_aggregation_probe.as_mut() + && probe.should_skip() + && let Some(batch) = self.emit(EmitTo::All, false)? + { + return Ok(Some(ExecutionState::ProducingOutput(batch))); + }; Ok(None) } @@ -1286,8 +1286,8 @@ mod tests { use crate::test::TestMemoryExec; use arrow::array::{Int32Array, Int64Array}; use arrow::datatypes::{DataType, Field, Schema}; - use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_execution::TaskContext; + use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_functions_aggregate::count::count_udaf; use datafusion_physical_expr::aggregate::AggregateExprBuilder; use datafusion_physical_expr::expressions::col; diff --git a/datafusion/physical-plan/src/aggregates/topk/hash_table.rs b/datafusion/physical-plan/src/aggregates/topk/hash_table.rs index 974aea3b6292..1fae507d9016 100644 --- a/datafusion/physical-plan/src/aggregates/topk/hash_table.rs +++ b/datafusion/physical-plan/src/aggregates/topk/hash_table.rs @@ -22,12 +22,12 @@ use crate::aggregates::topk::heap::Comparable; use ahash::RandomState; use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano}; use arrow::array::{ - builder::PrimitiveBuilder, cast::AsArray, downcast_primitive, Array, ArrayRef, - ArrowPrimitiveType, LargeStringArray, PrimitiveArray, StringArray, StringViewArray, + Array, ArrayRef, ArrowPrimitiveType, LargeStringArray, PrimitiveArray, StringArray, + StringViewArray, builder::PrimitiveBuilder, cast::AsArray, downcast_primitive, }; -use arrow::datatypes::{i256, DataType}; -use datafusion_common::exec_datafusion_err; +use arrow::datatypes::{DataType, i256}; use datafusion_common::Result; +use datafusion_common::exec_datafusion_err; use half::f16; use hashbrown::raw::RawTable; use std::fmt::Debug; @@ -131,20 +131,24 @@ impl ArrowHashTable for StringHashTable { } unsafe fn update_heap_idx(&mut self, mapper: &[(usize, usize)]) { - self.map.update_heap_idx(mapper); + unsafe { + self.map.update_heap_idx(mapper); + } } unsafe fn heap_idx_at(&self, map_idx: usize) -> usize { - self.map.heap_idx_at(map_idx) + unsafe { self.map.heap_idx_at(map_idx) } } unsafe fn take_all(&mut self, indexes: Vec) -> ArrayRef { - let ids = self.map.take_all(indexes); - match self.data_type { - DataType::Utf8 => Arc::new(StringArray::from(ids)), - DataType::LargeUtf8 => Arc::new(LargeStringArray::from(ids)), - DataType::Utf8View => Arc::new(StringViewArray::from(ids)), - _ => unreachable!(), + unsafe { + let ids = self.map.take_all(indexes); + match self.data_type { + DataType::Utf8 => Arc::new(StringArray::from(ids)), + DataType::LargeUtf8 => Arc::new(LargeStringArray::from(ids)), + DataType::Utf8View => Arc::new(StringViewArray::from(ids)), + _ => unreachable!(), + } } } @@ -154,61 +158,63 @@ impl ArrowHashTable for StringHashTable { replace_idx: usize, mapper: &mut Vec<(usize, usize)>, ) -> (usize, bool) { - let id = match self.data_type { - DataType::Utf8 => { - let ids = self - .owned - .as_any() - .downcast_ref::() - .expect("Expected StringArray for DataType::Utf8"); - if ids.is_null(row_idx) { - None - } else { - Some(ids.value(row_idx)) + unsafe { + let id = match self.data_type { + DataType::Utf8 => { + let ids = self + .owned + .as_any() + .downcast_ref::() + .expect("Expected StringArray for DataType::Utf8"); + if ids.is_null(row_idx) { + None + } else { + Some(ids.value(row_idx)) + } } - } - DataType::LargeUtf8 => { - let ids = self - .owned - .as_any() - .downcast_ref::() - .expect("Expected LargeStringArray for DataType::LargeUtf8"); - if ids.is_null(row_idx) { - None - } else { - Some(ids.value(row_idx)) + DataType::LargeUtf8 => { + let ids = self + .owned + .as_any() + .downcast_ref::() + .expect("Expected LargeStringArray for DataType::LargeUtf8"); + if ids.is_null(row_idx) { + None + } else { + Some(ids.value(row_idx)) + } } - } - DataType::Utf8View => { - let ids = self - .owned - .as_any() - .downcast_ref::() - .expect("Expected StringViewArray for DataType::Utf8View"); - if ids.is_null(row_idx) { - None - } else { - Some(ids.value(row_idx)) + DataType::Utf8View => { + let ids = self + .owned + .as_any() + .downcast_ref::() + .expect("Expected StringViewArray for DataType::Utf8View"); + if ids.is_null(row_idx) { + None + } else { + Some(ids.value(row_idx)) + } } + _ => panic!("Unsupported data type"), + }; + + let hash = self.rnd.hash_one(id); + if let Some(map_idx) = self + .map + .find(hash, |mi| id == mi.as_ref().map(|id| id.as_str())) + { + return (map_idx, false); } - _ => panic!("Unsupported data type"), - }; - - let hash = self.rnd.hash_one(id); - if let Some(map_idx) = self - .map - .find(hash, |mi| id == mi.as_ref().map(|id| id.as_str())) - { - return (map_idx, false); - } - // we're full and this is a better value, so remove the worst - let heap_idx = self.map.remove_if_full(replace_idx); + // we're full and this is a better value, so remove the worst + let heap_idx = self.map.remove_if_full(replace_idx); - // add the new group - let id = id.map(|id| id.to_string()); - let map_idx = self.map.insert(hash, id, heap_idx, mapper); - (map_idx, true) + // add the new group + let id = id.map(|id| id.to_string()); + let map_idx = self.map.insert(hash, id, heap_idx, mapper); + (map_idx, true) + } } } @@ -246,25 +252,29 @@ where } unsafe fn update_heap_idx(&mut self, mapper: &[(usize, usize)]) { - self.map.update_heap_idx(mapper); + unsafe { + self.map.update_heap_idx(mapper); + } } unsafe fn heap_idx_at(&self, map_idx: usize) -> usize { - self.map.heap_idx_at(map_idx) + unsafe { self.map.heap_idx_at(map_idx) } } unsafe fn take_all(&mut self, indexes: Vec) -> ArrayRef { - let ids = self.map.take_all(indexes); - let mut builder: PrimitiveBuilder = - PrimitiveArray::builder(ids.len()).with_data_type(self.kt.clone()); - for id in ids.into_iter() { - match id { - None => builder.append_null(), - Some(id) => builder.append_value(id), + unsafe { + let ids = self.map.take_all(indexes); + let mut builder: PrimitiveBuilder = + PrimitiveArray::builder(ids.len()).with_data_type(self.kt.clone()); + for id in ids.into_iter() { + match id { + None => builder.append_null(), + Some(id) => builder.append_value(id), + } } + let ids = builder.finish(); + Arc::new(ids) } - let ids = builder.finish(); - Arc::new(ids) } unsafe fn find_or_insert( @@ -273,24 +283,26 @@ where replace_idx: usize, mapper: &mut Vec<(usize, usize)>, ) -> (usize, bool) { - let ids = self.owned.as_primitive::(); - let id: Option = if ids.is_null(row_idx) { - None - } else { - Some(ids.value(row_idx)) - }; + unsafe { + let ids = self.owned.as_primitive::(); + let id: Option = if ids.is_null(row_idx) { + None + } else { + Some(ids.value(row_idx)) + }; - let hash: u64 = id.hash(&self.rnd); - if let Some(map_idx) = self.map.find(hash, |mi| id == *mi) { - return (map_idx, false); - } + let hash: u64 = id.hash(&self.rnd); + if let Some(map_idx) = self.map.find(hash, |mi| id == *mi) { + return (map_idx, false); + } - // we're full and this is a better value, so remove the worst - let heap_idx = self.map.remove_if_full(replace_idx); + // we're full and this is a better value, so remove the worst + let heap_idx = self.map.remove_if_full(replace_idx); - // add the new group - let map_idx = self.map.insert(hash, id, heap_idx, mapper); - (map_idx, true) + // add the new group + let map_idx = self.map.insert(hash, id, heap_idx, mapper); + (map_idx, true) + } } } @@ -312,22 +324,28 @@ impl TopKHashTable { } pub unsafe fn heap_idx_at(&self, map_idx: usize) -> usize { - let bucket = unsafe { self.map.bucket(map_idx) }; - bucket.as_ref().heap_idx + unsafe { + let bucket = self.map.bucket(map_idx); + bucket.as_ref().heap_idx + } } pub unsafe fn remove_if_full(&mut self, replace_idx: usize) -> usize { - if self.map.len() >= self.limit { - self.map.erase(self.map.bucket(replace_idx)); - 0 // if full, always replace top node - } else { - self.map.len() // if we're not full, always append to end + unsafe { + if self.map.len() >= self.limit { + self.map.erase(self.map.bucket(replace_idx)); + 0 // if full, always replace top node + } else { + self.map.len() // if we're not full, always append to end + } } } unsafe fn update_heap_idx(&mut self, mapper: &[(usize, usize)]) { - for (m, h) in mapper { - self.map.bucket(*m).as_mut().heap_idx = *h + unsafe { + for (m, h) in mapper { + self.map.bucket(*m).as_mut().heap_idx = *h + } } } @@ -368,12 +386,14 @@ impl TopKHashTable { } pub unsafe fn take_all(&mut self, idxs: Vec) -> Vec { - let ids = idxs - .into_iter() - .map(|idx| self.map.bucket(idx).as_ref().id.clone()) - .collect(); - self.map.clear(); - ids + unsafe { + let ids = idxs + .into_iter() + .map(|idx| self.map.bucket(idx).as_ref().id.clone()) + .collect(); + self.map.clear(); + ids + } } } diff --git a/datafusion/physical-plan/src/aggregates/topk/heap.rs b/datafusion/physical-plan/src/aggregates/topk/heap.rs index 83d76a919e4f..0e6bc18e2d4a 100644 --- a/datafusion/physical-plan/src/aggregates/topk/heap.rs +++ b/datafusion/physical-plan/src/aggregates/topk/heap.rs @@ -17,15 +17,15 @@ //! A custom binary heap implementation for performant top K aggregation +use arrow::array::{ArrayRef, ArrowPrimitiveType, PrimitiveArray, downcast_primitive}; use arrow::array::{ cast::AsArray, types::{IntervalDayTime, IntervalMonthDayNano}, }; -use arrow::array::{downcast_primitive, ArrayRef, ArrowPrimitiveType, PrimitiveArray}; use arrow::buffer::ScalarBuffer; -use arrow::datatypes::{i256, DataType}; -use datafusion_common::exec_datafusion_err; +use arrow::datatypes::{DataType, i256}; use datafusion_common::Result; +use datafusion_common::exec_datafusion_err; use half::f16; use std::cmp::Ordering; @@ -311,13 +311,12 @@ impl TopKHeap { let mut best_idx = node_idx; let mut best_val = &entry.val; for child_idx in left_child..=left_child + 1 { - if let Some(Some(child)) = self.heap.get(child_idx) { - if (!desc && child.val.comp(best_val) == Ordering::Greater) - || (desc && child.val.comp(best_val) == Ordering::Less) - { - best_val = &child.val; - best_idx = child_idx; - } + if let Some(Some(child)) = self.heap.get(child_idx) + && ((!desc && child.val.comp(best_val) == Ordering::Greater) + || (desc && child.val.comp(best_val) == Ordering::Less)) + { + best_val = &child.val; + best_idx = child_idx; } } if best_val.comp(&entry.val) != Ordering::Equal { @@ -329,11 +328,7 @@ impl TopKHeap { fn _tree_print(&self, idx: usize, prefix: &str, is_tail: bool, output: &mut String) { if let Some(Some(hi)) = self.heap.get(idx) { let connector = if idx != 0 { - if is_tail { - "└── " - } else { - "├── " - } + if is_tail { "└── " } else { "├── " } } else { "" }; diff --git a/datafusion/physical-plan/src/aggregates/topk/priority_map.rs b/datafusion/physical-plan/src/aggregates/topk/priority_map.rs index a09d70f7471f..afed265345a5 100644 --- a/datafusion/physical-plan/src/aggregates/topk/priority_map.rs +++ b/datafusion/physical-plan/src/aggregates/topk/priority_map.rs @@ -17,8 +17,8 @@ //! A `Map` / `PriorityQueue` combo that evicts the worst values after reaching `capacity` -use crate::aggregates::topk::hash_table::{new_hash_table, ArrowHashTable}; -use crate::aggregates::topk::heap::{new_heap, ArrowHeap}; +use crate::aggregates::topk::hash_table::{ArrowHashTable, new_hash_table}; +use crate::aggregates::topk::heap::{ArrowHeap, new_heap}; use arrow::array::ArrayRef; use arrow::datatypes::DataType; use datafusion_common::Result; diff --git a/datafusion/physical-plan/src/aggregates/topk_stream.rs b/datafusion/physical-plan/src/aggregates/topk_stream.rs index c706b48e348e..1096eb64d3ae 100644 --- a/datafusion/physical-plan/src/aggregates/topk_stream.rs +++ b/datafusion/physical-plan/src/aggregates/topk_stream.rs @@ -20,20 +20,20 @@ use crate::aggregates::group_values::GroupByMetrics; use crate::aggregates::topk::priority_map::PriorityMap; use crate::aggregates::{ - aggregate_expressions, evaluate_group_by, evaluate_many, AggregateExec, - PhysicalGroupBy, + AggregateExec, PhysicalGroupBy, aggregate_expressions, evaluate_group_by, + evaluate_many, }; use crate::metrics::BaselineMetrics; use crate::{RecordBatchStream, SendableRecordBatchStream}; use arrow::array::{Array, ArrayRef, RecordBatch}; use arrow::datatypes::SchemaRef; use arrow::util::pretty::print_batches; -use datafusion_common::internal_datafusion_err; use datafusion_common::Result; +use datafusion_common::internal_datafusion_err; use datafusion_execution::TaskContext; use datafusion_physical_expr::PhysicalExpr; use futures::stream::{Stream, StreamExt}; -use log::{trace, Level}; +use log::{Level, trace}; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs index 01f997f23d6a..1fb8f93a3878 100644 --- a/datafusion/physical-plan/src/analyze.rs +++ b/datafusion/physical-plan/src/analyze.rs @@ -31,7 +31,7 @@ use crate::{DisplayFormatType, ExecutionPlan, Partitioning}; use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch}; use datafusion_common::instant::Instant; -use datafusion_common::{assert_eq_or_internal_err, DataFusionError, Result}; +use datafusion_common::{DataFusionError, Result, assert_eq_or_internal_err}; use datafusion_execution::TaskContext; use datafusion_physical_expr::EquivalenceProperties; @@ -278,7 +278,7 @@ mod tests { collect, test::{ assert_is_pending, - exec::{assert_strong_count_converges_to_zero, BlockingExec}, + exec::{BlockingExec, assert_strong_count_converges_to_zero}, }, }; diff --git a/datafusion/physical-plan/src/async_func.rs b/datafusion/physical-plan/src/async_func.rs index 57b124a618c6..280995f48004 100644 --- a/datafusion/physical-plan/src/async_func.rs +++ b/datafusion/physical-plan/src/async_func.rs @@ -23,12 +23,12 @@ use crate::{ use arrow::array::RecordBatch; use arrow_schema::{Fields, Schema, SchemaRef}; use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion}; -use datafusion_common::{assert_eq_or_internal_err, Result}; +use datafusion_common::{Result, assert_eq_or_internal_err}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; +use datafusion_physical_expr::ScalarFunctionExpr; use datafusion_physical_expr::async_scalar_function::AsyncFuncExpr; use datafusion_physical_expr::equivalence::ProjectionMapping; use datafusion_physical_expr::expressions::Column; -use datafusion_physical_expr::ScalarFunctionExpr; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use futures::stream::StreamExt; use log::trace; @@ -262,15 +262,14 @@ impl AsyncMapper { physical_expr.apply(|expr| { if let Some(scalar_func_expr) = expr.as_any().downcast_ref::() + && scalar_func_expr.fun().as_async().is_some() { - if scalar_func_expr.fun().as_async().is_some() { - let next_name = self.next_column_name(); - self.async_exprs.push(Arc::new(AsyncFuncExpr::try_new( - next_name, - Arc::clone(expr), - schema, - )?)); - } + let next_name = self.next_column_name(); + self.async_exprs.push(Arc::new(AsyncFuncExpr::try_new( + next_name, + Arc::clone(expr), + schema, + )?)); } Ok(TreeNodeRecursion::Continue) })?; diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs index d0930b2c0e58..b3947170d9e4 100644 --- a/datafusion/physical-plan/src/coalesce/mod.rs +++ b/datafusion/physical-plan/src/coalesce/mod.rs @@ -18,7 +18,7 @@ use arrow::array::RecordBatch; use arrow::compute::BatchCoalescer; use arrow::datatypes::SchemaRef; -use datafusion_common::{assert_or_internal_err, Result}; +use datafusion_common::{Result, assert_or_internal_err}; /// Concatenate multiple [`RecordBatch`]es and apply a limit /// diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs index 64e0315a523d..7f207d7f1e83 100644 --- a/datafusion/physical-plan/src/coalesce_partitions.rs +++ b/datafusion/physical-plan/src/coalesce_partitions.rs @@ -29,11 +29,11 @@ use super::{ }; use crate::execution_plan::{CardinalityEffect, EvaluationType, SchedulingType}; use crate::filter_pushdown::{FilterDescription, FilterPushdownPhase}; -use crate::projection::{make_with_child, ProjectionExec}; +use crate::projection::{ProjectionExec, make_with_child}; use crate::{DisplayFormatType, ExecutionPlan, Partitioning}; use datafusion_common::config::ConfigOptions; -use datafusion_common::{assert_eq_or_internal_err, internal_err, Result}; +use datafusion_common::{Result, assert_eq_or_internal_err, internal_err}; use datafusion_execution::TaskContext; use datafusion_physical_expr::PhysicalExpr; @@ -290,7 +290,7 @@ impl ExecutionPlan for CoalescePartitionsExec { mod tests { use super::*; use crate::test::exec::{ - assert_strong_count_converges_to_zero, BlockingExec, PanicExec, + BlockingExec, PanicExec, assert_strong_count_converges_to_zero, }; use crate::test::{self, assert_is_pending}; use crate::{collect, common}; diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs index e9a8499a7c9a..79ed168a2ea4 100644 --- a/datafusion/physical-plan/src/common.rs +++ b/datafusion/physical-plan/src/common.rs @@ -29,7 +29,7 @@ use arrow::array::Array; use arrow::datatypes::Schema; use arrow::record_batch::RecordBatch; use datafusion_common::stats::Precision; -use datafusion_common::{plan_err, Result}; +use datafusion_common::{Result, plan_err}; use datafusion_execution::memory_pool::MemoryReservation; use futures::{StreamExt, TryStreamExt}; diff --git a/datafusion/physical-plan/src/coop.rs b/datafusion/physical-plan/src/coop.rs index aa5e7b4a8cec..87d0ee8a3ad0 100644 --- a/datafusion/physical-plan/src/coop.rs +++ b/datafusion/physical-plan/src/coop.rs @@ -85,7 +85,7 @@ use crate::{ }; use arrow::record_batch::RecordBatch; use arrow_schema::Schema; -use datafusion_common::{assert_eq_or_internal_err, Result, Statistics}; +use datafusion_common::{Result, Statistics, assert_eq_or_internal_err}; use datafusion_execution::TaskContext; use crate::execution_plan::SchedulingType; @@ -347,7 +347,7 @@ mod tests { use arrow_schema::SchemaRef; - use futures::{stream, StreamExt}; + use futures::{StreamExt, stream}; // This is the hardcoded value Tokio uses const TASK_BUDGET: usize = 128; diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs index 35ca0b65ae29..52c37a106b39 100644 --- a/datafusion/physical-plan/src/display.rs +++ b/datafusion/physical-plan/src/display.rs @@ -31,7 +31,7 @@ use datafusion_physical_expr::LexOrdering; use crate::metrics::MetricType; use crate::render_tree::RenderTree; -use super::{accept, ExecutionPlan, ExecutionPlanVisitor}; +use super::{ExecutionPlan, ExecutionPlanVisitor, accept}; /// Options for controlling how each [`ExecutionPlan`] should format itself #[derive(Debug, Clone, Copy, PartialEq)] @@ -1120,7 +1120,7 @@ mod tests { use std::fmt::Write; use std::sync::Arc; - use datafusion_common::{internal_datafusion_err, Result, Statistics}; + use datafusion_common::{Result, Statistics, internal_datafusion_err}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use crate::{DisplayAs, ExecutionPlan, PlanProperties}; diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs index e072b55ecff4..fcfbcfa3e827 100644 --- a/datafusion/physical-plan/src/empty.rs +++ b/datafusion/physical-plan/src/empty.rs @@ -21,15 +21,15 @@ use std::any::Any; use std::sync::Arc; use crate::memory::MemoryStream; -use crate::{common, DisplayAs, PlanProperties, SendableRecordBatchStream, Statistics}; +use crate::{DisplayAs, PlanProperties, SendableRecordBatchStream, Statistics, common}; use crate::{ - execution_plan::{Boundedness, EmissionType}, DisplayFormatType, ExecutionPlan, Partitioning, + execution_plan::{Boundedness, EmissionType}, }; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; -use datafusion_common::{assert_or_internal_err, Result}; +use datafusion_common::{Result, assert_or_internal_err}; use datafusion_execution::TaskContext; use datafusion_physical_expr::EquivalenceProperties; @@ -134,7 +134,12 @@ impl ExecutionPlan for EmptyExec { partition: usize, context: Arc, ) -> Result { - trace!("Start EmptyExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()); + trace!( + "Start EmptyExec::execute for partition {} of context session_id {} and task_id {:?}", + partition, + context.session_id(), + context.task_id() + ); assert_or_internal_err!( partition < self.partitions, diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs index d3043ee93c05..b7967bb7bbc8 100644 --- a/datafusion/physical-plan/src/execution_plan.rs +++ b/datafusion/physical-plan/src/execution_plan.rs @@ -26,12 +26,12 @@ pub use crate::stream::EmptyRecordBatchStream; pub use datafusion_common::hash_utils; pub use datafusion_common::utils::project_schema; -pub use datafusion_common::{internal_err, ColumnStatistics, Statistics}; +pub use datafusion_common::{ColumnStatistics, Statistics, internal_err}; pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream}; pub use datafusion_expr::{Accumulator, ColumnarValue}; pub use datafusion_physical_expr::window::WindowExpr; pub use datafusion_physical_expr::{ - expressions, Distribution, Partitioning, PhysicalExpr, + Distribution, Partitioning, PhysicalExpr, expressions, }; use std::any::Any; @@ -48,8 +48,8 @@ use arrow::array::{Array, RecordBatch}; use arrow::datatypes::SchemaRef; use datafusion_common::config::ConfigOptions; use datafusion_common::{ - assert_eq_or_internal_err, assert_or_internal_err, exec_err, Constraints, - DataFusionError, Result, + Constraints, DataFusionError, Result, assert_eq_or_internal_err, + assert_or_internal_err, exec_err, }; use datafusion_common_runtime::JoinSet; use datafusion_execution::TaskContext; @@ -921,7 +921,7 @@ pub(crate) fn boundedness_from_children<'a>( } => { return Boundedness::Unbounded { requires_infinite_memory: true, - } + }; } Boundedness::Unbounded { requires_infinite_memory: false, diff --git a/datafusion/physical-plan/src/explain.rs b/datafusion/physical-plan/src/explain.rs index 4b8491cf14dd..aa3c0afefe8b 100644 --- a/datafusion/physical-plan/src/explain.rs +++ b/datafusion/physical-plan/src/explain.rs @@ -27,7 +27,7 @@ use crate::{DisplayFormatType, ExecutionPlan, Partitioning}; use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch}; use datafusion_common::display::StringifiedPlan; -use datafusion_common::{assert_eq_or_internal_err, Result}; +use datafusion_common::{Result, assert_eq_or_internal_err}; use datafusion_execution::TaskContext; use datafusion_physical_expr::EquivalenceProperties; @@ -133,7 +133,12 @@ impl ExecutionPlan for ExplainExec { partition: usize, context: Arc, ) -> Result { - trace!("Start ExplainExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()); + trace!( + "Start ExplainExec::execute for partition {} of context session_id {} and task_id {:?}", + partition, + context.session_id(), + context.task_id() + ); assert_eq_or_internal_err!( partition, 0, @@ -174,7 +179,11 @@ impl ExecutionPlan for ExplainExec { )?; trace!( - "Before returning RecordBatchStream in ExplainExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()); + "Before returning RecordBatchStream in ExplainExec::execute for partition {} of context session_id {} and task_id {:?}", + partition, + context.session_id(), + context.task_id() + ); Ok(Box::pin(RecordBatchStreamAdapter::new( Arc::clone(&self.schema), diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index addabb50360c..7802d890b982 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -18,7 +18,7 @@ use std::any::Any; use std::pin::Pin; use std::sync::Arc; -use std::task::{ready, Context, Poll}; +use std::task::{Context, Poll, ready}; use itertools::Itertools; @@ -36,12 +36,12 @@ use crate::filter_pushdown::{ }; use crate::metrics::{MetricBuilder, MetricType}; use crate::projection::{ - make_with_child, try_embed_projection, update_expr, EmbeddedProjection, - ProjectionExec, ProjectionExpr, + EmbeddedProjection, ProjectionExec, ProjectionExpr, make_with_child, + try_embed_projection, update_expr, }; use crate::{ - metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RatioMetrics}, DisplayFormatType, ExecutionPlan, + metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RatioMetrics}, }; use arrow::compute::filter_record_batch; @@ -51,17 +51,17 @@ use datafusion_common::cast::as_boolean_array; use datafusion_common::config::ConfigOptions; use datafusion_common::stats::Precision; use datafusion_common::{ - internal_err, plan_err, project_schema, DataFusionError, Result, ScalarValue, + DataFusionError, Result, ScalarValue, internal_err, plan_err, project_schema, }; use datafusion_execution::TaskContext; use datafusion_expr::Operator; use datafusion_physical_expr::equivalence::ProjectionMapping; -use datafusion_physical_expr::expressions::{lit, BinaryExpr, Column}; +use datafusion_physical_expr::expressions::{BinaryExpr, Column, lit}; use datafusion_physical_expr::intervals::utils::check_support; use datafusion_physical_expr::utils::collect_columns; use datafusion_physical_expr::{ - analyze, conjunction, split_conjunction, AcrossPartitions, AnalysisContext, - ConstExpr, ExprBoundaries, PhysicalExpr, + AcrossPartitions, AnalysisContext, ConstExpr, ExprBoundaries, PhysicalExpr, analyze, + conjunction, split_conjunction, }; use datafusion_physical_expr_common::physical_expr::fmt_sql; @@ -252,22 +252,21 @@ impl FilterExec { let conjunctions = split_conjunction(predicate); for conjunction in conjunctions { - if let Some(binary) = conjunction.as_any().downcast_ref::() { - if binary.op() == &Operator::Eq { - // Filter evaluates to single value for all partitions - if input_eqs.is_expr_constant(binary.left()).is_some() { - let across = input_eqs - .is_expr_constant(binary.right()) - .unwrap_or_default(); - res_constants - .push(ConstExpr::new(Arc::clone(binary.right()), across)); - } else if input_eqs.is_expr_constant(binary.right()).is_some() { - let across = input_eqs - .is_expr_constant(binary.left()) - .unwrap_or_default(); - res_constants - .push(ConstExpr::new(Arc::clone(binary.left()), across)); - } + if let Some(binary) = conjunction.as_any().downcast_ref::() + && binary.op() == &Operator::Eq + { + // Filter evaluates to single value for all partitions + if input_eqs.is_expr_constant(binary.left()).is_some() { + let across = input_eqs + .is_expr_constant(binary.right()) + .unwrap_or_default(); + res_constants + .push(ConstExpr::new(Arc::clone(binary.right()), across)); + } else if input_eqs.is_expr_constant(binary.right()).is_some() { + let across = input_eqs + .is_expr_constant(binary.left()) + .unwrap_or_default(); + res_constants.push(ConstExpr::new(Arc::clone(binary.left()), across)); } } } @@ -415,7 +414,12 @@ impl ExecutionPlan for FilterExec { partition: usize, context: Arc, ) -> Result { - trace!("Start FilterExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()); + trace!( + "Start FilterExec::execute for partition {} of context session_id {} and task_id {:?}", + partition, + context.session_id(), + context.task_id() + ); let metrics = FilterExecMetrics::new(&self.metrics, partition); Ok(Box::pin(FilterExecStream { schema: self.schema(), diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index b90243fdb6c1..cc1b06111a8f 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -21,20 +21,20 @@ use std::{any::Any, sync::Arc, task::Poll}; use super::utils::{ - adjust_right_output_partitioning, reorder_output_after_swap, BatchSplitter, - BatchTransformer, BuildProbeJoinMetrics, NoopBatchTransformer, OnceAsync, OnceFut, - StatefulStreamResult, + BatchSplitter, BatchTransformer, BuildProbeJoinMetrics, NoopBatchTransformer, + OnceAsync, OnceFut, StatefulStreamResult, adjust_right_output_partitioning, + reorder_output_after_swap, }; -use crate::execution_plan::{boundedness_from_children, EmissionType}; +use crate::execution_plan::{EmissionType, boundedness_from_children}; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use crate::projection::{ - join_allows_pushdown, join_table_borders, new_join_children, - physical_to_column_exprs, ProjectionExec, + ProjectionExec, join_allows_pushdown, join_table_borders, new_join_children, + physical_to_column_exprs, }; use crate::{ - handle_state, ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, - ExecutionPlan, ExecutionPlanProperties, PlanProperties, RecordBatchStream, - SendableRecordBatchStream, Statistics, + ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, + ExecutionPlanProperties, PlanProperties, RecordBatchStream, + SendableRecordBatchStream, Statistics, handle_state, }; use arrow::array::{RecordBatch, RecordBatchOptions}; @@ -42,14 +42,14 @@ use arrow::compute::concat_batches; use arrow::datatypes::{Fields, Schema, SchemaRef}; use datafusion_common::stats::Precision; use datafusion_common::{ - assert_eq_or_internal_err, internal_err, JoinType, Result, ScalarValue, + JoinType, Result, ScalarValue, assert_eq_or_internal_err, internal_err, }; -use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; +use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_physical_expr::equivalence::join_equivalence_properties; use async_trait::async_trait; -use futures::{ready, Stream, StreamExt, TryStreamExt}; +use futures::{Stream, StreamExt, TryStreamExt, ready}; /// Data of the left side that is buffered into memory #[derive(Debug)] diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs index 425aac9031cc..26447847631f 100644 --- a/datafusion/physical-plan/src/joins/hash_join/exec.rs +++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs @@ -21,7 +21,8 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, OnceLock}; use std::{any::Any, vec}; -use crate::execution_plan::{boundedness_from_children, EmissionType}; +use crate::ExecutionPlanProperties; +use crate::execution_plan::{EmissionType, boundedness_from_children}; use crate::filter_pushdown::{ ChildPushdownResult, FilterDescription, FilterPushdownPhase, FilterPushdownPropagation, @@ -35,27 +36,26 @@ use crate::joins::hash_join::stream::{ }; use crate::joins::join_hash_map::{JoinHashMapU32, JoinHashMapU64}; use crate::joins::utils::{ - asymmetric_join_output_partitioning, reorder_output_after_swap, swap_join_projection, - update_hash, OnceAsync, OnceFut, + OnceAsync, OnceFut, asymmetric_join_output_partitioning, reorder_output_after_swap, + swap_join_projection, update_hash, }; use crate::joins::{JoinOn, JoinOnRef, PartitionMode, SharedBitmapBuilder}; use crate::projection::{ - try_embed_projection, try_pushdown_through_join, EmbeddedProjection, JoinData, - ProjectionExec, + EmbeddedProjection, JoinData, ProjectionExec, try_embed_projection, + try_pushdown_through_join, }; use crate::repartition::REPARTITION_RANDOM_STATE; use crate::spill::get_record_batch_memory_size; -use crate::ExecutionPlanProperties; use crate::{ + DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, + PlanProperties, SendableRecordBatchStream, Statistics, common::can_project, joins::utils::{ + BuildProbeJoinMetrics, ColumnIndex, JoinFilter, JoinHashMapType, build_join_schema, check_join_is_valid, estimate_join_statistics, need_produce_result_in_final, symmetric_join_output_partitioning, - BuildProbeJoinMetrics, ColumnIndex, JoinFilter, JoinHashMapType, }, metrics::{ExecutionPlanMetricsSet, MetricsSet}, - DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, - PlanProperties, SendableRecordBatchStream, Statistics, }; use arrow::array::{ArrayRef, BooleanBufferBuilder}; @@ -67,17 +67,17 @@ use arrow_schema::DataType; use datafusion_common::config::ConfigOptions; use datafusion_common::utils::memory::estimate_memory_size; use datafusion_common::{ - assert_or_internal_err, plan_err, project_schema, JoinSide, JoinType, NullEquality, - Result, + JoinSide, JoinType, NullEquality, Result, assert_or_internal_err, plan_err, + project_schema, }; -use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; +use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_expr::Accumulator; use datafusion_functions_aggregate_common::min_max::{MaxAccumulator, MinAccumulator}; use datafusion_physical_expr::equivalence::{ - join_equivalence_properties, ProjectionMapping, + ProjectionMapping, join_equivalence_properties, }; -use datafusion_physical_expr::expressions::{lit, DynamicFilterPhysicalExpr}; +use datafusion_physical_expr::expressions::{DynamicFilterPhysicalExpr, lit}; use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef}; use ahash::RandomState; @@ -1186,7 +1186,7 @@ impl ExecutionPlan for HashJoinExec { let mut result = FilterPushdownPropagation::if_any(child_pushdown_result.clone()); assert_eq!(child_pushdown_result.self_filters.len(), 2); // Should always be 2, we have 2 children let right_child_self_filters = &child_pushdown_result.self_filters[1]; // We only push down filters to the right child - // We expect 0 or 1 self filters + // We expect 0 or 1 self filters if let Some(filter) = right_child_self_filters.first() { // Note that we don't check PushdDownPredicate::discrimnant because even if nothing said // "yes, I can fully evaluate this filter" things might still use it for statistics -> it's worth updating @@ -1542,7 +1542,7 @@ mod tests { use super::*; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::joins::hash_join::stream::lookup_join_hashmap; - use crate::test::{assert_join_metrics, TestMemoryExec}; + use crate::test::{TestMemoryExec, assert_join_metrics}; use crate::{ common, expressions::Column, repartition::RepartitionExec, test::build_table_i32, test::exec::MockExec, @@ -1555,14 +1555,14 @@ mod tests { use datafusion_common::hash_utils::create_hashes; use datafusion_common::test_util::{batches_to_sort_string, batches_to_string}; use datafusion_common::{ - assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err, - internal_err, ScalarValue, + ScalarValue, assert_batches_eq, assert_batches_sorted_eq, assert_contains, + exec_err, internal_err, }; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_expr::Operator; - use datafusion_physical_expr::expressions::{BinaryExpr, Literal}; use datafusion_physical_expr::PhysicalExpr; + use datafusion_physical_expr::expressions::{BinaryExpr, Literal}; use hashbrown::HashTable; use insta::{allow_duplicates, assert_snapshot}; use rstest::*; @@ -1691,7 +1691,7 @@ mod tests { Partitioning::Hash(left_expr, partition_count), )?), PartitionMode::Auto => { - return internal_err!("Unexpected PartitionMode::Auto in join tests") + return internal_err!("Unexpected PartitionMode::Auto in join tests"); } }; @@ -1712,7 +1712,7 @@ mod tests { Partitioning::Hash(right_expr, partition_count), )?), PartitionMode::Auto => { - return internal_err!("Unexpected PartitionMode::Auto in join tests") + return internal_err!("Unexpected PartitionMode::Auto in join tests"); } }; @@ -4442,7 +4442,6 @@ mod tests { assert_contains!( err.to_string(), "Resources exhausted: Additional allocation failed for HashJoinInput[1] with top memory consumers (across reservations) as:\n HashJoinInput[1]" - ); assert_contains!( diff --git a/datafusion/physical-plan/src/joins/hash_join/partitioned_hash_eval.rs b/datafusion/physical-plan/src/joins/hash_join/partitioned_hash_eval.rs index 9b0ae2ab47a4..ffceb6b659aa 100644 --- a/datafusion/physical-plan/src/joins/hash_join/partitioned_hash_eval.rs +++ b/datafusion/physical-plan/src/joins/hash_join/partitioned_hash_eval.rs @@ -26,7 +26,7 @@ use arrow::{ datatypes::{DataType, Schema}, util::bit_util, }; -use datafusion_common::{internal_datafusion_err, internal_err, Result}; +use datafusion_common::{Result, internal_datafusion_err, internal_err}; use datafusion_expr::ColumnarValue; use datafusion_physical_expr_common::physical_expr::{ DynHash, PhysicalExpr, PhysicalExprRef, diff --git a/datafusion/physical-plan/src/joins/hash_join/shared_bounds.rs b/datafusion/physical-plan/src/joins/hash_join/shared_bounds.rs index a77dd1407597..5aa2bbb57df4 100644 --- a/datafusion/physical-plan/src/joins/hash_join/shared_bounds.rs +++ b/datafusion/physical-plan/src/joins/hash_join/shared_bounds.rs @@ -21,13 +21,13 @@ use std::fmt; use std::sync::Arc; +use crate::ExecutionPlan; +use crate::ExecutionPlanProperties; +use crate::joins::PartitionMode; use crate::joins::hash_join::exec::HASH_JOIN_SEED; use crate::joins::hash_join::inlist_builder::build_struct_fields; use crate::joins::hash_join::partitioned_hash_eval::{HashExpr, HashTableLookupExpr}; use crate::joins::utils::JoinHashMapType; -use crate::joins::PartitionMode; -use crate::ExecutionPlan; -use crate::ExecutionPlanProperties; use ahash::RandomState; use arrow::array::ArrayRef; @@ -37,7 +37,7 @@ use datafusion_common::{Result, ScalarValue}; use datafusion_expr::Operator; use datafusion_functions::core::r#struct as struct_func; use datafusion_physical_expr::expressions::{ - lit, BinaryExpr, CaseExpr, DynamicFilterPhysicalExpr, InListExpr, + BinaryExpr, CaseExpr, DynamicFilterPhysicalExpr, InListExpr, lit, }; use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef, ScalarFunctionExpr}; @@ -322,17 +322,24 @@ impl SharedBuildAccumulator { left_child.output_partitioning().partition_count() } // Default value, will be resolved during optimization (does not exist once `execute()` is called; will be replaced by one of the other two) - PartitionMode::Auto => unreachable!("PartitionMode::Auto should not be present at execution time. This is a bug in DataFusion, please report it!"), + PartitionMode::Auto => unreachable!( + "PartitionMode::Auto should not be present at execution time. This is a bug in DataFusion, please report it!" + ), }; let mode_data = match partition_mode { PartitionMode::Partitioned => AccumulatedBuildData::Partitioned { - partitions: vec![None; left_child.output_partitioning().partition_count()], - }, - PartitionMode::CollectLeft => AccumulatedBuildData::CollectLeft { - data: None, + partitions: vec![ + None; + left_child.output_partitioning().partition_count() + ], }, - PartitionMode::Auto => unreachable!("PartitionMode::Auto should not be present at execution time. This is a bug in DataFusion, please report it!"), + PartitionMode::CollectLeft => { + AccumulatedBuildData::CollectLeft { data: None } + } + PartitionMode::Auto => unreachable!( + "PartitionMode::Auto should not be present at execution time. This is a bug in DataFusion, please report it!" + ), }; Self { diff --git a/datafusion/physical-plan/src/joins/hash_join/stream.rs b/datafusion/physical-plan/src/joins/hash_join/stream.rs index 4f70bd6b12e1..e6735675125b 100644 --- a/datafusion/physical-plan/src/joins/hash_join/stream.rs +++ b/datafusion/physical-plan/src/joins/hash_join/stream.rs @@ -23,25 +23,24 @@ use std::sync::Arc; use std::task::Poll; +use crate::joins::PartitionMode; use crate::joins::hash_join::exec::JoinLeftData; use crate::joins::hash_join::shared_bounds::{ PartitionBounds, PartitionBuildData, SharedBuildAccumulator, }; use crate::joins::utils::{ - equal_rows_arr, get_final_indices_from_shared_bitmap, OnceFut, + OnceFut, equal_rows_arr, get_final_indices_from_shared_bitmap, }; -use crate::joins::PartitionMode; use crate::{ - handle_state, + RecordBatchStream, SendableRecordBatchStream, handle_state, hash_utils::create_hashes, joins::join_hash_map::JoinHashMapOffset, joins::utils::{ - adjust_indices_by_join_type, apply_join_filter_to_indices, + BuildProbeJoinMetrics, ColumnIndex, JoinFilter, JoinHashMapType, + StatefulStreamResult, adjust_indices_by_join_type, apply_join_filter_to_indices, build_batch_empty_build_side, build_batch_from_indices, - need_produce_result_in_final, BuildProbeJoinMetrics, ColumnIndex, JoinFilter, - JoinHashMapType, StatefulStreamResult, + need_produce_result_in_final, }, - RecordBatchStream, SendableRecordBatchStream, }; use arrow::array::{Array, ArrayRef, UInt32Array, UInt64Array}; @@ -49,13 +48,13 @@ use arrow::compute::BatchCoalescer; use arrow::datatypes::{Schema, SchemaRef}; use arrow::record_batch::RecordBatch; use datafusion_common::{ - internal_datafusion_err, internal_err, JoinSide, JoinType, NullEquality, Result, + JoinSide, JoinType, NullEquality, Result, internal_datafusion_err, internal_err, }; use datafusion_physical_expr::PhysicalExprRef; use ahash::RandomState; use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays; -use futures::{ready, Stream, StreamExt}; +use futures::{Stream, StreamExt, ready}; /// Represents build-side of hash join. pub(super) enum BuildSide { @@ -476,11 +475,12 @@ impl HashJoinStream { ) -> Poll>>> { let build_timer = self.join_metrics.build_time.timer(); // build hash table from left (build) side, if not yet done - let left_data = ready!(self - .build_side - .try_as_initial_mut()? - .left_fut - .get_shared(cx))?; + let left_data = ready!( + self.build_side + .try_as_initial_mut()? + .left_fut + .get_shared(cx) + )?; build_timer.done(); // Handle dynamic filter build-side information accumulation @@ -494,7 +494,9 @@ impl HashJoinStream { let left_side_partition_id = match self.mode { PartitionMode::Partitioned => self.partition, PartitionMode::CollectLeft => 0, - PartitionMode::Auto => unreachable!("PartitionMode::Auto should not be present at execution time. This is a bug in DataFusion, please report it!"), + PartitionMode::Auto => unreachable!( + "PartitionMode::Auto should not be present at execution time. This is a bug in DataFusion, please report it!" + ), }; // Determine pushdown strategy based on availability of InList values diff --git a/datafusion/physical-plan/src/joins/join_hash_map.rs b/datafusion/physical-plan/src/joins/join_hash_map.rs index 0e3150631059..ed370fdb16cf 100644 --- a/datafusion/physical-plan/src/joins/join_hash_map.rs +++ b/datafusion/physical-plan/src/joins/join_hash_map.rs @@ -23,8 +23,8 @@ use std::fmt::{self, Debug}; use std::ops::Sub; use arrow::datatypes::ArrowNativeType; -use hashbrown::hash_table::Entry::{Occupied, Vacant}; use hashbrown::HashTable; +use hashbrown::hash_table::Entry::{Occupied, Vacant}; /// Maps a `u64` hash value based on the build side ["on" values] to a list of indices with this key's value. /// diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index e61e457247df..c11ee0f37894 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -20,8 +20,8 @@ use std::any::Any; use std::fmt::Formatter; use std::ops::{BitOr, ControlFlow}; -use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; use std::task::Poll; use super::utils::{ @@ -29,19 +29,19 @@ use super::utils::{ reorder_output_after_swap, swap_join_projection, }; use crate::common::can_project; -use crate::execution_plan::{boundedness_from_children, EmissionType}; +use crate::execution_plan::{EmissionType, boundedness_from_children}; +use crate::joins::SharedBitmapBuilder; use crate::joins::utils::{ + BuildProbeJoinMetrics, ColumnIndex, JoinFilter, OnceAsync, OnceFut, build_join_schema, check_join_is_valid, estimate_join_statistics, - need_produce_right_in_final, BuildProbeJoinMetrics, ColumnIndex, JoinFilter, - OnceAsync, OnceFut, + need_produce_right_in_final, }; -use crate::joins::SharedBitmapBuilder; use crate::metrics::{ Count, ExecutionPlanMetricsSet, MetricBuilder, MetricType, MetricsSet, RatioMetrics, }; use crate::projection::{ - try_embed_projection, try_pushdown_through_join, EmbeddedProjection, JoinData, - ProjectionExec, + EmbeddedProjection, JoinData, ProjectionExec, try_embed_projection, + try_pushdown_through_join, }; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, @@ -49,26 +49,26 @@ use crate::{ }; use arrow::array::{ - new_null_array, Array, BooleanArray, BooleanBufferBuilder, RecordBatchOptions, - UInt32Array, UInt64Array, + Array, BooleanArray, BooleanBufferBuilder, RecordBatchOptions, UInt32Array, + UInt64Array, new_null_array, }; use arrow::buffer::BooleanBuffer; use arrow::compute::{ - concat_batches, filter, filter_record_batch, not, take, BatchCoalescer, + BatchCoalescer, concat_batches, filter, filter_record_batch, not, take, }; use arrow::datatypes::{Schema, SchemaRef}; use arrow::record_batch::RecordBatch; use arrow_schema::DataType; use datafusion_common::cast::as_boolean_array; use datafusion_common::{ - arrow_err, assert_eq_or_internal_err, internal_datafusion_err, internal_err, - project_schema, unwrap_or_internal_err, JoinSide, Result, ScalarValue, Statistics, + JoinSide, Result, ScalarValue, Statistics, arrow_err, assert_eq_or_internal_err, + internal_datafusion_err, internal_err, project_schema, unwrap_or_internal_err, }; -use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; +use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_expr::JoinType; use datafusion_physical_expr::equivalence::{ - join_equivalence_properties, ProjectionMapping, + ProjectionMapping, join_equivalence_properties, }; use futures::{Stream, StreamExt, TryStreamExt}; @@ -932,7 +932,7 @@ impl Stream for NestedLoopJoinStream { match self.handle_probe_right() { ControlFlow::Continue(()) => continue, ControlFlow::Break(poll) => { - return self.metrics.join_metrics.baseline.record_poll(poll) + return self.metrics.join_metrics.baseline.record_poll(poll); } } } @@ -953,7 +953,7 @@ impl Stream for NestedLoopJoinStream { match self.handle_emit_right_unmatched() { ControlFlow::Continue(()) => continue, ControlFlow::Break(poll) => { - return self.metrics.join_metrics.baseline.record_poll(poll) + return self.metrics.join_metrics.baseline.record_poll(poll); } } } @@ -983,7 +983,7 @@ impl Stream for NestedLoopJoinStream { match self.handle_emit_left_unmatched() { ControlFlow::Continue(()) => continue, ControlFlow::Break(poll) => { - return self.metrics.join_metrics.baseline.record_poll(poll) + return self.metrics.join_metrics.baseline.record_poll(poll); } } } @@ -1293,7 +1293,10 @@ impl NestedLoopJoinStream { left_data.batch().num_rows() - self.left_probe_idx, ); - debug_assert!(l_row_count != 0, "This function should only be entered when there are remaining left rows to process"); + debug_assert!( + l_row_count != 0, + "This function should only be entered when there are remaining left rows to process" + ); let joined_batch = self.process_left_range_join( &left_data, &right_batch, @@ -1433,17 +1436,17 @@ impl NestedLoopJoinStream { let l_index = l_start_index + i / right_rows; let r_index = i % right_rows; - if let Some(bitmap) = left_bitmap.as_mut() { - if is_matched { - // Map local index back to absolute left index within the batch - bitmap.set_bit(l_index, true); - } + if let Some(bitmap) = left_bitmap.as_mut() + && is_matched + { + // Map local index back to absolute left index within the batch + bitmap.set_bit(l_index, true); } - if let Some(bitmap) = local_right_bitmap.as_mut() { - if is_matched { - bitmap.set_bit(r_index, true); - } + if let Some(bitmap) = local_right_bitmap.as_mut() + && is_matched + { + bitmap.set_bit(r_index, true); } } @@ -1711,14 +1714,14 @@ impl NestedLoopJoinStream { /// Flush the `output_buffer` if there are batches ready to output /// None if no result batch ready. fn maybe_flush_ready_batch(&mut self) -> Option>>> { - if self.output_buffer.has_completed_batch() { - if let Some(batch) = self.output_buffer.next_completed_batch() { - // Update output rows for selectivity metric - let output_rows = batch.num_rows(); - self.metrics.selectivity.add_part(output_rows); + if self.output_buffer.has_completed_batch() + && let Some(batch) = self.output_buffer.next_completed_batch() + { + // Update output rows for selectivity metric + let output_rows = batch.num_rows(); + self.metrics.selectivity.add_part(output_rows); - return Some(Poll::Ready(Some(Ok(batch)))); - } + return Some(Poll::Ready(Some(Ok(batch)))); } None @@ -2099,9 +2102,7 @@ fn build_unmatched_batch( another_side_schema .fields() .iter() - .map(|field| { - (**field).clone().with_nullable(true) - }) + .map(|field| (**field).clone().with_nullable(true)) .collect::>(), )); let left_null_batch = if nullable_left_schema.fields.is_empty() { @@ -2115,10 +2116,20 @@ fn build_unmatched_batch( debug_assert_ne!(batch_side, JoinSide::None); let opposite_side = batch_side.negate(); - build_row_join_batch(output_schema, &left_null_batch, 0, batch, Some(flipped_bitmap), col_indices, opposite_side) - - }, - JoinType::RightSemi | JoinType::RightAnti | JoinType::LeftSemi | JoinType::LeftAnti => { + build_row_join_batch( + output_schema, + &left_null_batch, + 0, + batch, + Some(flipped_bitmap), + col_indices, + opposite_side, + ) + } + JoinType::RightSemi + | JoinType::RightAnti + | JoinType::LeftSemi + | JoinType::LeftAnti => { if matches!(join_type, JoinType::RightSemi | JoinType::RightAnti) { debug_assert_eq!(batch_side, JoinSide::Right); } @@ -2126,7 +2137,8 @@ fn build_unmatched_batch( debug_assert_eq!(batch_side, JoinSide::Left); } - let bitmap = if matches!(join_type, JoinType::LeftSemi | JoinType::RightSemi) { + let bitmap = if matches!(join_type, JoinType::LeftSemi | JoinType::RightSemi) + { batch_bitmap.clone() } else { not(&batch_bitmap)? @@ -2148,8 +2160,11 @@ fn build_unmatched_batch( columns.push(filtered_col); } - Ok(Some(RecordBatch::try_new(Arc::clone(output_schema), columns)?)) - }, + Ok(Some(RecordBatch::try_new( + Arc::clone(output_schema), + columns, + )?)) + } JoinType::RightMark | JoinType::LeftMark => { if join_type == JoinType::RightMark { debug_assert_eq!(batch_side, JoinSide::Right); @@ -2172,24 +2187,33 @@ fn build_unmatched_batch( } else if column_index.side == JoinSide::None { let right_batch_bitmap = std::mem::take(&mut right_batch_bitmap_opt); match right_batch_bitmap { - Some(right_batch_bitmap) => {columns.push(Arc::new(right_batch_bitmap))}, + Some(right_batch_bitmap) => { + columns.push(Arc::new(right_batch_bitmap)) + } None => unreachable!("Should only be one mark column"), } } else { - return internal_err!("Not possible to have this join side for RightMark join"); + return internal_err!( + "Not possible to have this join side for RightMark join" + ); } } - Ok(Some(RecordBatch::try_new(Arc::clone(output_schema), columns)?)) + Ok(Some(RecordBatch::try_new( + Arc::clone(output_schema), + columns, + )?)) } - _ => internal_err!("If batch is at right side, this function must be handling Full/Right/RightSemi/RightAnti/RightMark joins"), + _ => internal_err!( + "If batch is at right side, this function must be handling Full/Right/RightSemi/RightAnti/RightMark joins" + ), } } #[cfg(test)] pub(crate) mod tests { use super::*; - use crate::test::{assert_join_metrics, TestMemoryExec}; + use crate::test::{TestMemoryExec, assert_join_metrics}; use crate::{ common, expressions::Column, repartition::RepartitionExec, test::build_table_i32, }; @@ -2197,7 +2221,7 @@ pub(crate) mod tests { use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field}; use datafusion_common::test_util::batches_to_sort_string; - use datafusion_common::{assert_contains, ScalarValue}; + use datafusion_common::{ScalarValue, assert_contains}; use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{BinaryExpr, Literal}; diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs index c74f4d984e13..7673fc084c46 100644 --- a/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs +++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs @@ -17,8 +17,8 @@ //! Stream Implementation for PiecewiseMergeJoin's Classic Join (Left, Right, Full, Inner) -use arrow::array::{new_null_array, Array, PrimitiveBuilder}; -use arrow::compute::{take, BatchCoalescer}; +use arrow::array::{Array, PrimitiveBuilder, new_null_array}; +use arrow::compute::{BatchCoalescer, take}; use arrow::datatypes::UInt32Type; use arrow::{ array::{ArrayRef, RecordBatch, UInt32Array}, @@ -26,7 +26,7 @@ use arrow::{ }; use arrow_schema::{Schema, SchemaRef, SortOptions}; use datafusion_common::NullEquality; -use datafusion_common::{internal_err, Result}; +use datafusion_common::{Result, internal_err}; use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream}; use datafusion_expr::{JoinType, Operator}; use datafusion_physical_expr::PhysicalExprRef; @@ -37,8 +37,8 @@ use std::{sync::Arc, task::Poll}; use crate::handle_state; use crate::joins::piecewise_merge_join::exec::{BufferedSide, BufferedSideReadyState}; use crate::joins::piecewise_merge_join::utils::need_produce_result_in_final; -use crate::joins::utils::{compare_join_arrays, get_final_indices_from_shared_bitmap}; use crate::joins::utils::{BuildProbeJoinMetrics, StatefulStreamResult}; +use crate::joins::utils::{compare_join_arrays, get_final_indices_from_shared_bitmap}; pub(super) enum PiecewiseMergeJoinStreamState { WaitBufferedSide, @@ -188,11 +188,12 @@ impl ClassicPWMJStream { cx: &mut std::task::Context<'_>, ) -> Poll>>> { let build_timer = self.join_metrics.build_time.timer(); - let buffered_data = ready!(self - .buffered_side - .try_as_initial_mut()? - .buffered_fut - .get_shared(cx))?; + let buffered_data = ready!( + self.buffered_side + .try_as_initial_mut()? + .buffered_fut + .get_shared(cx) + )?; build_timer.done(); // We will start fetching stream batches for classic joins @@ -548,7 +549,7 @@ fn resolve_classic_join( return internal_err!( "PiecewiseMergeJoin should not contain operator, {}", operator - ) + ); } }; @@ -653,17 +654,16 @@ fn create_unmatched_batch( mod tests { use super::*; use crate::{ - common, + ExecutionPlan, common, joins::PiecewiseMergeJoinExec, - test::{build_table_i32, TestMemoryExec}, - ExecutionPlan, + test::{TestMemoryExec, build_table_i32}, }; use arrow::array::{Date32Array, Date64Array}; use arrow_schema::{DataType, Field}; use datafusion_common::test_util::batches_to_string; use datafusion_execution::TaskContext; use datafusion_expr::JoinType; - use datafusion_physical_expr::{expressions::Column, PhysicalExpr}; + use datafusion_physical_expr::{PhysicalExpr, expressions::Column}; use insta::assert_snapshot; use std::sync::Arc; diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs index 06a1df438655..c821473c67dc 100644 --- a/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs +++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs @@ -23,10 +23,10 @@ use arrow::{ }; use arrow_schema::{SchemaRef, SortOptions}; use datafusion_common::not_impl_err; -use datafusion_common::{internal_err, JoinSide, Result}; +use datafusion_common::{JoinSide, Result, internal_err}; use datafusion_execution::{ - memory_pool::{MemoryConsumer, MemoryReservation}, SendableRecordBatchStream, + memory_pool::{MemoryConsumer, MemoryReservation}, }; use datafusion_expr::{JoinType, Operator}; use datafusion_physical_expr::equivalence::join_equivalence_properties; @@ -38,10 +38,10 @@ use datafusion_physical_expr_common::physical_expr::fmt_sql; use futures::TryStreamExt; use parking_lot::Mutex; use std::fmt::Formatter; -use std::sync::atomic::AtomicUsize; use std::sync::Arc; +use std::sync::atomic::AtomicUsize; -use crate::execution_plan::{boundedness_from_children, EmissionType}; +use crate::execution_plan::{EmissionType, boundedness_from_children}; use crate::joins::piecewise_merge_join::classic_join::{ ClassicPWMJStream, PiecewiseMergeJoinStreamState, @@ -50,16 +50,16 @@ use crate::joins::piecewise_merge_join::utils::{ build_visited_indices_map, is_existence_join, is_right_existence_join, }; use crate::joins::utils::asymmetric_join_output_partitioning; +use crate::{DisplayAs, DisplayFormatType, ExecutionPlanProperties}; use crate::{ + ExecutionPlan, PlanProperties, joins::{ - utils::{build_join_schema, BuildProbeJoinMetrics, OnceAsync, OnceFut}, SharedBitmapBuilder, + utils::{BuildProbeJoinMetrics, OnceAsync, OnceFut, build_join_schema}, }, metrics::ExecutionPlanMetricsSet, spill::get_record_batch_memory_size, - ExecutionPlan, PlanProperties, }; -use crate::{DisplayAs, DisplayFormatType, ExecutionPlanProperties}; /// `PiecewiseMergeJoinExec` is a join execution plan that only evaluates single range filter and show much /// better performance for these workloads than `NestedLoopJoin` @@ -321,7 +321,7 @@ impl PiecewiseMergeJoinExec { _ => { return internal_err!( "Cannot contain non-range operator in PiecewiseMergeJoinExec" - ) + ); } }; diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs b/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs index b5b4325798f9..5362259d22ea 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs @@ -23,19 +23,19 @@ use std::any::Any; use std::fmt::Formatter; use std::sync::Arc; -use crate::execution_plan::{boundedness_from_children, EmissionType}; +use crate::execution_plan::{EmissionType, boundedness_from_children}; use crate::expressions::PhysicalSortExpr; use crate::joins::sort_merge_join::metrics::SortMergeJoinMetrics; use crate::joins::sort_merge_join::stream::SortMergeJoinStream; use crate::joins::utils::{ - build_join_schema, check_join_is_valid, estimate_join_statistics, - reorder_output_after_swap, symmetric_join_output_partitioning, JoinFilter, JoinOn, - JoinOnRef, + JoinFilter, JoinOn, JoinOnRef, build_join_schema, check_join_is_valid, + estimate_join_statistics, reorder_output_after_swap, + symmetric_join_output_partitioning, }; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use crate::projection::{ - join_allows_pushdown, join_table_borders, new_join_children, - physical_to_column_exprs, update_join_on, ProjectionExec, + ProjectionExec, join_allows_pushdown, join_table_borders, new_join_children, + physical_to_column_exprs, update_join_on, }; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, @@ -45,13 +45,13 @@ use crate::{ use arrow::compute::SortOptions; use arrow::datatypes::SchemaRef; use datafusion_common::{ - assert_eq_or_internal_err, internal_err, plan_err, JoinSide, JoinType, NullEquality, - Result, + JoinSide, JoinType, NullEquality, Result, assert_eq_or_internal_err, internal_err, + plan_err, }; -use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_execution::TaskContext; +use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_physical_expr::equivalence::join_equivalence_properties; -use datafusion_physical_expr_common::physical_expr::{fmt_sql, PhysicalExprRef}; +use datafusion_physical_expr_common::physical_expr::{PhysicalExprRef, fmt_sql}; use datafusion_physical_expr_common::sort_expr::{LexOrdering, OrderingRequirements}; /// Join execution plan that executes equi-join predicates on multiple partitions using Sort-Merge diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs index 4119a54cd539..b36992caf4b4 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs @@ -28,29 +28,29 @@ use std::io::BufReader; use std::mem::size_of; use std::ops::Range; use std::pin::Pin; +use std::sync::Arc; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering::Relaxed; -use std::sync::Arc; use std::task::{Context, Poll}; use crate::joins::sort_merge_join::metrics::SortMergeJoinMetrics; -use crate::joins::utils::{compare_join_arrays, JoinFilter}; +use crate::joins::utils::{JoinFilter, compare_join_arrays}; use crate::metrics::RecordOutput; use crate::spill::spill_manager::SpillManager; use crate::{PhysicalExpr, RecordBatchStream, SendableRecordBatchStream}; use arrow::array::{types::UInt64Type, *}; use arrow::compute::{ - self, concat_batches, filter_record_batch, is_not_null, take, BatchCoalescer, - SortOptions, + self, BatchCoalescer, SortOptions, concat_batches, filter_record_batch, is_not_null, + take, }; use arrow::datatypes::{DataType, SchemaRef, TimeUnit}; use arrow::error::ArrowError; use arrow::ipc::reader::StreamReader; use datafusion_common::config::SpillCompression; use datafusion_common::{ - exec_err, internal_err, not_impl_err, DataFusionError, HashSet, JoinSide, JoinType, - NullEquality, Result, + DataFusionError, HashSet, JoinSide, JoinType, NullEquality, Result, exec_err, + internal_err, not_impl_err, }; use datafusion_execution::disk_manager::RefCountedTempFile; use datafusion_execution::memory_pool::MemoryReservation; diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs b/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs index 47a85b9b5c6e..46dfaac058aa 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs @@ -27,39 +27,39 @@ use std::sync::Arc; use arrow::array::{ - builder::{BooleanBuilder, UInt64Builder}, BinaryArray, BooleanArray, Date32Array, Date64Array, FixedSizeBinaryArray, Int32Array, RecordBatch, UInt64Array, + builder::{BooleanBuilder, UInt64Builder}, }; -use arrow::compute::{filter_record_batch, BatchCoalescer, SortOptions}; +use arrow::compute::{BatchCoalescer, SortOptions, filter_record_batch}; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::JoinType::*; use datafusion_common::{ - assert_batches_eq, assert_contains, JoinType, NullEquality, Result, + JoinSide, + test_util::{batches_to_sort_string, batches_to_string}, }; use datafusion_common::{ - test_util::{batches_to_sort_string, batches_to_string}, - JoinSide, + JoinType, NullEquality, Result, assert_batches_eq, assert_contains, }; +use datafusion_execution::TaskContext; use datafusion_execution::config::SessionConfig; use datafusion_execution::disk_manager::{DiskManagerBuilder, DiskManagerMode}; use datafusion_execution::runtime_env::RuntimeEnvBuilder; -use datafusion_execution::TaskContext; use datafusion_expr::Operator; use datafusion_physical_expr::expressions::BinaryExpr; use insta::{allow_duplicates, assert_snapshot}; use crate::{ expressions::Column, - joins::sort_merge_join::stream::{get_corrected_filter_mask, JoinedRecordBatches}, + joins::sort_merge_join::stream::{JoinedRecordBatches, get_corrected_filter_mask}, }; -use crate::joins::utils::{ColumnIndex, JoinFilter, JoinOn}; use crate::joins::SortMergeJoinExec; +use crate::joins::utils::{ColumnIndex, JoinFilter, JoinOn}; use crate::test::TestMemoryExec; use crate::test::{build_table_i32, build_table_i32_two_cols}; -use crate::{common, ExecutionPlan}; +use crate::{ExecutionPlan, common}; fn build_table( a: (&str, &Vec), diff --git a/datafusion/physical-plan/src/joins/stream_join_utils.rs b/datafusion/physical-plan/src/joins/stream_join_utils.rs index e56b9e781377..10f212bb4a20 100644 --- a/datafusion/physical-plan/src/joins/stream_join_utils.rs +++ b/datafusion/physical-plan/src/joins/stream_join_utils.rs @@ -23,12 +23,12 @@ use std::mem::size_of; use std::sync::Arc; use crate::joins::join_hash_map::{ - get_matched_indices, get_matched_indices_with_limit_offset, update_from_iter, - JoinHashMapOffset, + JoinHashMapOffset, get_matched_indices, get_matched_indices_with_limit_offset, + update_from_iter, }; use crate::joins::utils::{JoinFilter, JoinHashMapType}; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricBuilder}; -use crate::{metrics, ExecutionPlan}; +use crate::{ExecutionPlan, metrics}; use arrow::array::{ ArrowPrimitiveType, BooleanBufferBuilder, NativeAdapter, PrimitiveArray, RecordBatch, @@ -37,7 +37,7 @@ use arrow::compute::concat_batches; use arrow::datatypes::{ArrowNativeType, Schema, SchemaRef}; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::utils::memory::estimate_memory_size; -use datafusion_common::{arrow_datafusion_err, HashSet, JoinSide, Result, ScalarValue}; +use datafusion_common::{HashSet, JoinSide, Result, ScalarValue, arrow_datafusion_err}; use datafusion_expr::interval_arithmetic::Interval; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::intervals::cp_solver::ExprIntervalGraph; @@ -1020,46 +1020,54 @@ pub mod tests { let left_schema = Arc::new(left_schema); let right_schema = Arc::new(right_schema); - assert!(build_filter_input_order( - JoinSide::Left, - &filter, - &left_schema, - &PhysicalSortExpr { - expr: col("la1", left_schema.as_ref())?, - options: SortOptions::default(), - } - )? - .is_some()); - assert!(build_filter_input_order( - JoinSide::Left, - &filter, - &left_schema, - &PhysicalSortExpr { - expr: col("lt1", left_schema.as_ref())?, - options: SortOptions::default(), - } - )? - .is_none()); - assert!(build_filter_input_order( - JoinSide::Right, - &filter, - &right_schema, - &PhysicalSortExpr { - expr: col("ra1", right_schema.as_ref())?, - options: SortOptions::default(), - } - )? - .is_some()); - assert!(build_filter_input_order( - JoinSide::Right, - &filter, - &right_schema, - &PhysicalSortExpr { - expr: col("rb1", right_schema.as_ref())?, - options: SortOptions::default(), - } - )? - .is_none()); + assert!( + build_filter_input_order( + JoinSide::Left, + &filter, + &left_schema, + &PhysicalSortExpr { + expr: col("la1", left_schema.as_ref())?, + options: SortOptions::default(), + } + )? + .is_some() + ); + assert!( + build_filter_input_order( + JoinSide::Left, + &filter, + &left_schema, + &PhysicalSortExpr { + expr: col("lt1", left_schema.as_ref())?, + options: SortOptions::default(), + } + )? + .is_none() + ); + assert!( + build_filter_input_order( + JoinSide::Right, + &filter, + &right_schema, + &PhysicalSortExpr { + expr: col("ra1", right_schema.as_ref())?, + options: SortOptions::default(), + } + )? + .is_some() + ); + assert!( + build_filter_input_order( + JoinSide::Right, + &filter, + &right_schema, + &PhysicalSortExpr { + expr: col("rb1", right_schema.as_ref())?, + options: SortOptions::default(), + } + )? + .is_none() + ); Ok(()) } diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 2b8d2bd40e5e..1f6bc703a030 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -35,26 +35,26 @@ use std::vec; use crate::common::SharedMemoryReservation; use crate::execution_plan::{boundedness_from_children, emission_type_from_children}; use crate::joins::stream_join_utils::{ + PruningJoinHashMap, SortedFilterExpr, StreamJoinMetrics, calculate_filter_expr_intervals, combine_two_batches, convert_sort_expr_with_filter_schema, get_pruning_anti_indices, get_pruning_semi_indices, prepare_sorted_exprs, record_visited_indices, - PruningJoinHashMap, SortedFilterExpr, StreamJoinMetrics, }; use crate::joins::utils::{ - apply_join_filter_to_indices, build_batch_from_indices, build_join_schema, - check_join_is_valid, equal_rows_arr, symmetric_join_output_partitioning, update_hash, BatchSplitter, BatchTransformer, ColumnIndex, JoinFilter, JoinHashMapType, JoinOn, - JoinOnRef, NoopBatchTransformer, StatefulStreamResult, + JoinOnRef, NoopBatchTransformer, StatefulStreamResult, apply_join_filter_to_indices, + build_batch_from_indices, build_join_schema, check_join_is_valid, equal_rows_arr, + symmetric_join_output_partitioning, update_hash, }; use crate::projection::{ - join_allows_pushdown, join_table_borders, new_join_children, - physical_to_column_exprs, update_join_filter, update_join_on, ProjectionExec, + ProjectionExec, join_allows_pushdown, join_table_borders, new_join_children, + physical_to_column_exprs, update_join_filter, update_join_on, }; use crate::{ - joins::StreamJoinPartitionMode, - metrics::{ExecutionPlanMetricsSet, MetricsSet}, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, + joins::StreamJoinPartitionMode, + metrics::{ExecutionPlanMetricsSet, MetricsSet}, }; use arrow::array::{ @@ -67,20 +67,20 @@ use arrow::record_batch::RecordBatch; use datafusion_common::hash_utils::create_hashes; use datafusion_common::utils::bisect; use datafusion_common::{ - assert_eq_or_internal_err, plan_err, HashSet, JoinSide, JoinType, NullEquality, - Result, + HashSet, JoinSide, JoinType, NullEquality, Result, assert_eq_or_internal_err, + plan_err, }; -use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_execution::TaskContext; +use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_expr::interval_arithmetic::Interval; use datafusion_physical_expr::equivalence::join_equivalence_properties; use datafusion_physical_expr::intervals::cp_solver::ExprIntervalGraph; -use datafusion_physical_expr_common::physical_expr::{fmt_sql, PhysicalExprRef}; +use datafusion_physical_expr_common::physical_expr::{PhysicalExprRef, fmt_sql}; use datafusion_physical_expr_common::sort_expr::{LexOrdering, OrderingRequirements}; use ahash::RandomState; use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays; -use futures::{ready, Stream, StreamExt}; +use futures::{Stream, StreamExt, ready}; use parking_lot::Mutex; const HASHMAP_SHRINK_SCALE_FACTOR: usize = 4; @@ -1769,7 +1769,7 @@ mod tests { use datafusion_common::ScalarValue; use datafusion_execution::config::SessionConfig; use datafusion_expr::Operator; - use datafusion_physical_expr::expressions::{binary, col, lit, Column}; + use datafusion_physical_expr::expressions::{Column, binary, col, lit}; use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; use rstest::*; diff --git a/datafusion/physical-plan/src/joins/test_utils.rs b/datafusion/physical-plan/src/joins/test_utils.rs index 58338bd86021..27284bf546bc 100644 --- a/datafusion/physical-plan/src/joins/test_utils.rs +++ b/datafusion/physical-plan/src/joins/test_utils.rs @@ -25,11 +25,11 @@ use crate::joins::{ }; use crate::repartition::RepartitionExec; use crate::test::TestMemoryExec; -use crate::{common, ExecutionPlan, ExecutionPlanProperties, Partitioning}; +use crate::{ExecutionPlan, ExecutionPlanProperties, Partitioning, common}; use arrow::array::{ - types::IntervalDayTime, ArrayRef, Float64Array, Int32Array, IntervalDayTimeArray, - RecordBatch, TimestampMillisecondArray, + ArrayRef, Float64Array, Int32Array, IntervalDayTimeArray, RecordBatch, + TimestampMillisecondArray, types::IntervalDayTime, }; use arrow::datatypes::{DataType, Schema}; use arrow::util::pretty::pretty_format_batches; diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs index 561c9c022f60..ea1e569e2855 100644 --- a/datafusion/physical-plan/src/joins/utils.rs +++ b/datafusion/physical-plan/src/joins/utils.rs @@ -17,7 +17,7 @@ //! Join related functionality used both on logical and physical plans -use std::cmp::{min, Ordering}; +use std::cmp::{Ordering, min}; use std::collections::HashSet; use std::fmt::{self, Debug}; use std::future::Future; @@ -41,20 +41,20 @@ pub use crate::joins::{JoinOn, JoinOnRef}; use ahash::RandomState; use arrow::array::{ - builder::UInt64Builder, downcast_array, new_null_array, Array, ArrowPrimitiveType, - BooleanBufferBuilder, NativeAdapter, PrimitiveArray, RecordBatch, RecordBatchOptions, - UInt32Array, UInt32Builder, UInt64Array, + Array, ArrowPrimitiveType, BooleanBufferBuilder, NativeAdapter, PrimitiveArray, + RecordBatch, RecordBatchOptions, UInt32Array, UInt32Builder, UInt64Array, + builder::UInt64Builder, downcast_array, new_null_array, }; use arrow::array::{ ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array, - Decimal128Array, FixedSizeBinaryArray, Float32Array, Float64Array, Int16Array, - Int32Array, Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, StringArray, + Decimal128Array, FixedSizeBinaryArray, Float32Array, Float64Array, Int8Array, + Int16Array, Int32Array, Int64Array, LargeBinaryArray, LargeStringArray, StringArray, StringViewArray, TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt8Array, + TimestampNanosecondArray, TimestampSecondArray, UInt8Array, UInt16Array, }; use arrow::buffer::{BooleanBuffer, NullBuffer}; use arrow::compute::kernels::cmp::eq; -use arrow::compute::{self, and, take, FilterBuilder}; +use arrow::compute::{self, FilterBuilder, and, take}; use arrow::datatypes::{ ArrowNativeType, Field, Schema, SchemaBuilder, UInt32Type, UInt64Type, }; @@ -64,22 +64,22 @@ use datafusion_common::cast::as_boolean_array; use datafusion_common::hash_utils::create_hashes; use datafusion_common::stats::Precision; use datafusion_common::{ - not_impl_err, plan_err, DataFusionError, JoinSide, JoinType, NullEquality, Result, - SharedResult, + DataFusionError, JoinSide, JoinType, NullEquality, Result, SharedResult, + not_impl_err, plan_err, }; -use datafusion_expr::interval_arithmetic::Interval; use datafusion_expr::Operator; +use datafusion_expr::interval_arithmetic::Interval; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::utils::collect_columns; use datafusion_physical_expr::{ - add_offset_to_expr, add_offset_to_physical_sort_exprs, LexOrdering, PhysicalExpr, - PhysicalExprRef, + LexOrdering, PhysicalExpr, PhysicalExprRef, add_offset_to_expr, + add_offset_to_physical_sort_exprs, }; use datafusion_physical_expr_common::datum::compare_op_for_nested; use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays; use futures::future::{BoxFuture, Shared}; -use futures::{ready, FutureExt}; +use futures::{FutureExt, ready}; use parking_lot::Mutex; /// Checks whether the schemas "left" and "right" and columns "on" represent a valid join. @@ -159,20 +159,21 @@ pub fn calculate_join_output_ordering( match maintains_input_order { [true, false] => { // Special case, we can prefix ordering of right side with the ordering of left side. - if join_type == JoinType::Inner && probe_side == Some(JoinSide::Left) { - if let Some(right_ordering) = right_ordering.cloned() { - let right_offset = add_offset_to_physical_sort_exprs( - right_ordering, - left_columns_len as _, - )?; - return if let Some(left_ordering) = left_ordering { - let mut result = left_ordering.clone(); - result.extend(right_offset); - Ok(Some(result)) - } else { - Ok(LexOrdering::new(right_offset)) - }; - } + if join_type == JoinType::Inner + && probe_side == Some(JoinSide::Left) + && let Some(right_ordering) = right_ordering.cloned() + { + let right_offset = add_offset_to_physical_sort_exprs( + right_ordering, + left_columns_len as _, + )?; + return if let Some(left_ordering) = left_ordering { + let mut result = left_ordering.clone(); + result.extend(right_offset); + Ok(Some(result)) + } else { + Ok(LexOrdering::new(right_offset)) + }; } Ok(left_ordering.cloned()) } @@ -704,27 +705,25 @@ fn max_distinct_count( // Cap the estimate using the number of possible values: if let (Some(min), Some(max)) = (stats.min_value.get_value(), stats.max_value.get_value()) - { - if let Some(range_dc) = Interval::try_new(min.clone(), max.clone()) + && let Some(range_dc) = Interval::try_new(min.clone(), max.clone()) .ok() .and_then(|e| e.cardinality()) + { + let range_dc = range_dc as usize; + // Note that the `unwrap` calls in the below statement are safe. + return if matches!(result, Precision::Absent) + || &range_dc < result.get_value().unwrap() { - let range_dc = range_dc as usize; - // Note that the `unwrap` calls in the below statement are safe. - return if matches!(result, Precision::Absent) - || &range_dc < result.get_value().unwrap() + if stats.min_value.is_exact().unwrap() + && stats.max_value.is_exact().unwrap() { - if stats.min_value.is_exact().unwrap() - && stats.max_value.is_exact().unwrap() - { - Precision::Exact(range_dc) - } else { - Precision::Inexact(range_dc) - } + Precision::Exact(range_dc) } else { - result - }; - } + Precision::Inexact(range_dc) + } + } else { + result + }; } result @@ -1884,7 +1883,7 @@ mod tests { use arrow::datatypes::{DataType, Fields}; use arrow::error::{ArrowError, Result as ArrowResult}; use datafusion_common::stats::Precision::{Absent, Exact, Inexact}; - use datafusion_common::{arrow_datafusion_err, arrow_err, ScalarValue}; + use datafusion_common::{ScalarValue, arrow_datafusion_err, arrow_err}; use datafusion_physical_expr::PhysicalSortExpr; use rstest::rstest; @@ -2679,7 +2678,10 @@ mod tests { &join_on, ).expect("Expected non-empty PartialJoinStatistics for SemiJoin with absent inner num_rows"); - assert_eq!(absent_inner_estimation.num_rows, 500, "Expected outer.num_rows estimated SemiJoin cardinality for absent inner num_rows"); + assert_eq!( + absent_inner_estimation.num_rows, 500, + "Expected outer.num_rows estimated SemiJoin cardinality for absent inner num_rows" + ); let absent_inner_estimation = estimate_join_cardinality( &JoinType::LeftSemi, @@ -2695,7 +2697,10 @@ mod tests { }, &join_on, ); - assert!(absent_inner_estimation.is_none(), "Expected \"None\" estimated SemiJoin cardinality for absent outer and inner num_rows"); + assert!( + absent_inner_estimation.is_none(), + "Expected \"None\" estimated SemiJoin cardinality for absent outer and inner num_rows" + ); Ok(()) } diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs index 107ace95bb8d..849b34e70347 100644 --- a/datafusion/physical-plan/src/lib.rs +++ b/datafusion/physical-plan/src/lib.rs @@ -33,26 +33,26 @@ pub use datafusion_common::hash_utils; pub use datafusion_common::utils::project_schema; -pub use datafusion_common::{internal_err, ColumnStatistics, Statistics}; +pub use datafusion_common::{ColumnStatistics, Statistics, internal_err}; pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream}; pub use datafusion_expr::{Accumulator, ColumnarValue}; -pub use datafusion_physical_expr::window::WindowExpr; use datafusion_physical_expr::PhysicalSortExpr; +pub use datafusion_physical_expr::window::WindowExpr; pub use datafusion_physical_expr::{ - expressions, Distribution, Partitioning, PhysicalExpr, + Distribution, Partitioning, PhysicalExpr, expressions, }; pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay}; pub use crate::execution_plan::{ - collect, collect_partitioned, displayable, execute_input_stream, execute_stream, - execute_stream_partitioned, get_plan_string, with_new_children_if_necessary, - ExecutionPlan, ExecutionPlanProperties, PlanProperties, + ExecutionPlan, ExecutionPlanProperties, PlanProperties, collect, collect_partitioned, + displayable, execute_input_stream, execute_stream, execute_stream_partitioned, + get_plan_string, with_new_children_if_necessary, }; pub use crate::metrics::Metric; pub use crate::ordering::InputOrderMode; pub use crate::stream::EmptyRecordBatchStream; pub use crate::topk::TopK; -pub use crate::visitor::{accept, visit_execution_plan, ExecutionPlanVisitor}; +pub use crate::visitor::{ExecutionPlanVisitor, accept, visit_execution_plan}; pub use crate::work_table::WorkTable; pub use spill::spill_manager::SpillManager; diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs index 4646e8ebc313..05d688282147 100644 --- a/datafusion/physical-plan/src/limit.rs +++ b/datafusion/physical-plan/src/limit.rs @@ -32,7 +32,7 @@ use crate::{DisplayFormatType, Distribution, ExecutionPlan, Partitioning}; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; -use datafusion_common::{assert_eq_or_internal_err, internal_err, Result}; +use datafusion_common::{Result, assert_eq_or_internal_err, internal_err}; use datafusion_execution::TaskContext; use futures::stream::{Stream, StreamExt}; @@ -320,7 +320,12 @@ impl ExecutionPlan for LocalLimitExec { partition: usize, context: Arc, ) -> Result { - trace!("Start LocalLimitExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()); + trace!( + "Start LocalLimitExec::execute for partition {} of context session_id {} and task_id {:?}", + partition, + context.session_id(), + context.task_id() + ); let baseline_metrics = BaselineMetrics::new(&self.metrics, partition); let stream = self.input.execute(partition, context)?; Ok(Box::pin(LimitStream::new( @@ -494,8 +499,8 @@ mod tests { use arrow::array::RecordBatchOptions; use arrow::datatypes::Schema; use datafusion_common::stats::Precision; - use datafusion_physical_expr::expressions::col; use datafusion_physical_expr::PhysicalExpr; + use datafusion_physical_expr::expressions::col; #[tokio::test] async fn limit() -> Result<()> { diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs index 92e789ebc596..65a3fe575e17 100644 --- a/datafusion/physical-plan/src/memory.rs +++ b/datafusion/physical-plan/src/memory.rs @@ -32,9 +32,9 @@ use crate::{ use arrow::array::RecordBatch; use arrow::datatypes::SchemaRef; -use datafusion_common::{assert_eq_or_internal_err, assert_or_internal_err, Result}; -use datafusion_execution::memory_pool::MemoryReservation; +use datafusion_common::{Result, assert_eq_or_internal_err, assert_or_internal_err}; use datafusion_execution::TaskContext; +use datafusion_execution::memory_pool::MemoryReservation; use datafusion_physical_expr::EquivalenceProperties; use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; diff --git a/datafusion/physical-plan/src/metrics/builder.rs b/datafusion/physical-plan/src/metrics/builder.rs index 21f6762e73ed..42ff427a53ed 100644 --- a/datafusion/physical-plan/src/metrics/builder.rs +++ b/datafusion/physical-plan/src/metrics/builder.rs @@ -20,8 +20,8 @@ use std::{borrow::Cow, sync::Arc}; use crate::metrics::{ - value::{PruningMetrics, RatioMergeStrategy, RatioMetrics}, MetricType, + value::{PruningMetrics, RatioMergeStrategy, RatioMetrics}, }; use super::{ diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs index c39779090bae..2e4a9a100375 100644 --- a/datafusion/physical-plan/src/metrics/mod.rs +++ b/datafusion/physical-plan/src/metrics/mod.rs @@ -742,9 +742,15 @@ mod tests { n.join(", ") } - assert_eq!("end_timestamp, start_timestamp, elapsed_compute, the_second_counter, the_counter, the_third_counter, the_time, output_rows", metric_names(&metrics)); + assert_eq!( + "end_timestamp, start_timestamp, elapsed_compute, the_second_counter, the_counter, the_third_counter, the_time, output_rows", + metric_names(&metrics) + ); let metrics = metrics.sorted_for_display(); - assert_eq!("output_rows, elapsed_compute, the_counter, the_second_counter, the_third_counter, the_time, start_timestamp, end_timestamp", metric_names(&metrics)); + assert_eq!( + "output_rows, elapsed_compute, the_counter, the_second_counter, the_third_counter, the_time, start_timestamp, end_timestamp", + metric_names(&metrics) + ); } } diff --git a/datafusion/physical-plan/src/metrics/value.rs b/datafusion/physical-plan/src/metrics/value.rs index 901494883aa8..8016380be994 100644 --- a/datafusion/physical-plan/src/metrics/value.rs +++ b/datafusion/physical-plan/src/metrics/value.rs @@ -28,8 +28,8 @@ use std::{ borrow::{Borrow, Cow}, fmt::{Debug, Display}, sync::{ - atomic::{AtomicUsize, Ordering}, Arc, + atomic::{AtomicUsize, Ordering}, }, time::Duration, }; @@ -1076,12 +1076,11 @@ mod tests { fn new_custom_counter(name: &'static str, value: usize) -> MetricValue { let custom_counter = CustomCounter::default(); custom_counter.count.fetch_add(value, Ordering::Relaxed); - let custom_val = MetricValue::Custom { + + MetricValue::Custom { name: Cow::Borrowed(name), value: Arc::new(custom_counter), - }; - - custom_val + } } #[test] diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs index be4c3da509e8..4d00b73cff39 100644 --- a/datafusion/physical-plan/src/placeholder_row.rs +++ b/datafusion/physical-plan/src/placeholder_row.rs @@ -24,13 +24,13 @@ use crate::coop::cooperative; use crate::execution_plan::{Boundedness, EmissionType, SchedulingType}; use crate::memory::MemoryStream; use crate::{ - common, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, - SendableRecordBatchStream, Statistics, + DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, + SendableRecordBatchStream, Statistics, common, }; use arrow::array::{ArrayRef, NullArray, RecordBatch, RecordBatchOptions}; use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef}; -use datafusion_common::{assert_or_internal_err, Result}; +use datafusion_common::{Result, assert_or_internal_err}; use datafusion_execution::TaskContext; use datafusion_physical_expr::EquivalenceProperties; @@ -152,7 +152,12 @@ impl ExecutionPlan for PlaceholderRowExec { partition: usize, context: Arc, ) -> Result { - trace!("Start PlaceholderRowExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()); + trace!( + "Start PlaceholderRowExec::execute for partition {} of context session_id {} and task_id {:?}", + partition, + context.session_id(), + context.task_id() + ); assert_or_internal_err!( partition < self.partitions, diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index 022842bbe741..a56e9272f119 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -45,17 +45,17 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, }; -use datafusion_common::{internal_err, JoinSide, Result}; +use datafusion_common::{JoinSide, Result, internal_err}; use datafusion_execution::TaskContext; use datafusion_physical_expr::equivalence::ProjectionMapping; use datafusion_physical_expr::projection::Projector; use datafusion_physical_expr::utils::collect_columns; -use datafusion_physical_expr_common::physical_expr::{fmt_sql, PhysicalExprRef}; +use datafusion_physical_expr_common::physical_expr::{PhysicalExprRef, fmt_sql}; use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; // Re-exported from datafusion-physical-expr for backwards compatibility // We recommend updating your imports to use datafusion-physical-expr directly pub use datafusion_physical_expr::projection::{ - update_expr, ProjectionExpr, ProjectionExprs, + ProjectionExpr, ProjectionExprs, update_expr, }; use futures::stream::{Stream, StreamExt}; @@ -287,7 +287,12 @@ impl ExecutionPlan for ProjectionExec { partition: usize, context: Arc, ) -> Result { - trace!("Start ProjectionExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()); + trace!( + "Start ProjectionExec::execute for partition {} of context session_id {} and task_id {:?}", + partition, + context.session_id(), + context.task_id() + ); Ok(Box::pin(ProjectionStream::new( self.projector.clone(), self.input.execute(partition, context)?, @@ -1001,11 +1006,11 @@ mod tests { use crate::test::exec::StatisticsExec; use arrow::datatypes::{DataType, Field, Schema}; - use datafusion_common::stats::{ColumnStatistics, Precision, Statistics}; use datafusion_common::ScalarValue; + use datafusion_common::stats::{ColumnStatistics, Precision, Statistics}; use datafusion_expr::Operator; - use datafusion_physical_expr::expressions::{col, BinaryExpr, Column, Literal}; + use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal, col}; #[test] fn test_collect_column_indices() -> Result<()> { diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs index e2df8f9578f9..3e7c75b0c8e8 100644 --- a/datafusion/physical-plan/src/recursive_query.rs +++ b/datafusion/physical-plan/src/recursive_query.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use std::task::{Context, Poll}; use super::work_table::{ReservedBatches, WorkTable}; -use crate::aggregates::group_values::{new_group_values, GroupValues}; +use crate::aggregates::group_values::{GroupValues, new_group_values}; use crate::aggregates::order::GroupOrdering; use crate::execution_plan::{Boundedness, EmissionType}; use crate::metrics::{ @@ -37,12 +37,12 @@ use arrow::compute::filter_record_batch; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::{internal_datafusion_err, not_impl_err, Result}; -use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; +use datafusion_common::{Result, internal_datafusion_err, not_impl_err}; use datafusion_execution::TaskContext; +use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; -use futures::{ready, Stream, StreamExt}; +use futures::{Stream, StreamExt, ready}; /// Recursive query execution plan. /// diff --git a/datafusion/physical-plan/src/repartition/distributor_channels.rs b/datafusion/physical-plan/src/repartition/distributor_channels.rs index 34294d0f2326..22872d1e32d4 100644 --- a/datafusion/physical-plan/src/repartition/distributor_channels.rs +++ b/datafusion/physical-plan/src/repartition/distributor_channels.rs @@ -43,8 +43,8 @@ use std::{ ops::DerefMut, pin::Pin, sync::{ - atomic::{AtomicUsize, Ordering}, Arc, + atomic::{AtomicUsize, Ordering}, }, task::{Context, Poll, Waker}, }; @@ -476,7 +476,7 @@ type SharedGate = Arc; mod tests { use std::sync::atomic::AtomicBool; - use futures::{task::ArcWake, FutureExt}; + use futures::{FutureExt, task::ArcWake}; use super::*; diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index a84201d1c7c0..9d437dbcf650 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -34,7 +34,7 @@ use crate::coalesce::LimitedBatchCoalescer; use crate::execution_plan::{CardinalityEffect, EvaluationType, SchedulingType}; use crate::hash_utils::create_hashes; use crate::metrics::{BaselineMetrics, SpillMetrics}; -use crate::projection::{all_columns, make_with_child, update_expr, ProjectionExec}; +use crate::projection::{ProjectionExec, all_columns, make_with_child, update_expr}; use crate::sorts::streaming_merge::StreamingMergeBuilder; use crate::spill::spill_manager::SpillManager; use crate::spill::spill_pool::{self, SpillPoolWriter}; @@ -48,12 +48,12 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::stats::Precision; use datafusion_common::utils::transpose; use datafusion_common::{ - assert_or_internal_err, internal_err, ColumnStatistics, DataFusionError, HashMap, + ColumnStatistics, DataFusionError, HashMap, assert_or_internal_err, internal_err, }; -use datafusion_common::{not_impl_err, Result}; +use datafusion_common::{Result, not_impl_err}; use datafusion_common_runtime::SpawnedTask; -use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_execution::TaskContext; +use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr}; use datafusion_physical_expr_common::sort_expr::LexOrdering; @@ -63,13 +63,13 @@ use crate::filter_pushdown::{ }; use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays; use futures::stream::Stream; -use futures::{ready, FutureExt, StreamExt, TryStreamExt}; +use futures::{FutureExt, StreamExt, TryStreamExt, ready}; use log::trace; use parking_lot::Mutex; mod distributor_channels; use distributor_channels::{ - channels, partition_aware_channels, DistributionReceiver, DistributionSender, + DistributionReceiver, DistributionSender, channels, partition_aware_channels, }; /// A batch in the repartition queue - either in memory or spilled to disk. @@ -276,7 +276,9 @@ impl RepartitionExecState { let RepartitionExecState::InputStreamsInitialized(value) = self else { // This cannot happen, as ensure_input_streams_initialized() was just called, // but the compiler does not know. - return internal_err!("Programming error: RepartitionExecState must be in the InputStreamsInitialized state after calling RepartitionExecState::ensure_input_streams_initialized"); + return internal_err!( + "Programming error: RepartitionExecState must be in the InputStreamsInitialized state after calling RepartitionExecState::ensure_input_streams_initialized" + ); }; value } @@ -1616,8 +1618,8 @@ mod tests { test::{ assert_is_pending, exec::{ - assert_strong_count_converges_to_zero, BarrierExec, BlockingExec, - ErrorExec, MockExec, + BarrierExec, BlockingExec, ErrorExec, MockExec, + assert_strong_count_converges_to_zero, }, }, {collect, expressions::col}, @@ -2562,8 +2564,8 @@ mod test { #[tokio::test] async fn test_preserve_order_with_spilling() -> Result<()> { - use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_execution::TaskContext; + use datafusion_execution::runtime_env::RuntimeEnvBuilder; // Create sorted input data across multiple partitions // Partition1: [1,3], [5,7], [9,11] @@ -2689,8 +2691,8 @@ mod test { #[tokio::test] async fn test_hash_partitioning_with_spilling() -> Result<()> { - use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_execution::TaskContext; + use datafusion_execution::runtime_env::RuntimeEnvBuilder; // Create input data similar to the round-robin test let batch1 = record_batch!(("c0", UInt32, [1, 3])).unwrap(); diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs index 54dc2414e4f0..de3ec2e7a91e 100644 --- a/datafusion/physical-plan/src/sorts/cursor.rs +++ b/datafusion/physical-plan/src/sorts/cursor.rs @@ -19,8 +19,8 @@ use std::cmp::Ordering; use std::sync::Arc; use arrow::array::{ - types::ByteArrayType, Array, ArrowPrimitiveType, GenericByteArray, - GenericByteViewArray, OffsetSizeTrait, PrimitiveArray, StringViewArray, + Array, ArrowPrimitiveType, GenericByteArray, GenericByteViewArray, OffsetSizeTrait, + PrimitiveArray, StringViewArray, types::ByteArrayType, }; use arrow::buffer::{Buffer, OffsetBuffer, ScalarBuffer}; use arrow::compute::SortOptions; diff --git a/datafusion/physical-plan/src/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs index 720a3e53e459..272816251daf 100644 --- a/datafusion/physical-plan/src/sorts/merge.rs +++ b/datafusion/physical-plan/src/sorts/merge.rs @@ -20,13 +20,13 @@ use std::pin::Pin; use std::sync::Arc; -use std::task::{ready, Context, Poll}; +use std::task::{Context, Poll, ready}; +use crate::RecordBatchStream; use crate::metrics::BaselineMetrics; use crate::sorts::builder::BatchBuilder; use crate::sorts::cursor::{Cursor, CursorValues}; use crate::sorts::stream::PartitionedStream; -use crate::RecordBatchStream; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; diff --git a/datafusion/physical-plan/src/sorts/multi_level_merge.rs b/datafusion/physical-plan/src/sorts/multi_level_merge.rs index 784c9f7a6057..3540f1de3ed1 100644 --- a/datafusion/physical-plan/src/sorts/multi_level_merge.rs +++ b/datafusion/physical-plan/src/sorts/multi_level_merge.rs @@ -290,7 +290,11 @@ impl MultiLevelMergeBuilder { // If we're only merging memory streams, we don't need to attach the memory reservation // as it's empty if is_only_merging_memory_streams { - assert_eq!(memory_reservation.size(), 0, "when only merging memory streams, we should not have any memory reservation and let the merge sort handle the memory"); + assert_eq!( + memory_reservation.size(), + 0, + "when only merging memory streams, we should not have any memory reservation and let the merge sort handle the memory" + ); Ok(merge_sort_stream) } else { diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs index 7a623b0c30d3..c474a1a9ea4c 100644 --- a/datafusion/physical-plan/src/sorts/partial_sort.rs +++ b/datafusion/physical-plan/src/sorts/partial_sort.rs @@ -67,12 +67,12 @@ use crate::{ use arrow::compute::concat_batches; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; -use datafusion_common::utils::evaluate_partition_ranges; use datafusion_common::Result; +use datafusion_common::utils::evaluate_partition_ranges; use datafusion_execution::{RecordBatchStream, TaskContext}; use datafusion_physical_expr::LexOrdering; -use futures::{ready, Stream, StreamExt}; +use futures::{Stream, StreamExt, ready}; use log::trace; /// Partial Sort execution plan. @@ -220,9 +220,17 @@ impl DisplayAs for PartialSortExec { let common_prefix_length = self.common_prefix_length; match self.fetch { Some(fetch) => { - write!(f, "PartialSortExec: TopK(fetch={fetch}), expr=[{}], common_prefix_length=[{common_prefix_length}]", self.expr) + write!( + f, + "PartialSortExec: TopK(fetch={fetch}), expr=[{}], common_prefix_length=[{common_prefix_length}]", + self.expr + ) } - None => write!(f, "PartialSortExec: expr=[{}], common_prefix_length=[{common_prefix_length}]", self.expr), + None => write!( + f, + "PartialSortExec: expr=[{}], common_prefix_length=[{common_prefix_length}]", + self.expr + ), } } DisplayFormatType::TreeRender => match self.fetch { @@ -291,7 +299,12 @@ impl ExecutionPlan for PartialSortExec { partition: usize, context: Arc, ) -> Result { - trace!("Start PartialSortExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()); + trace!( + "Start PartialSortExec::execute for partition {} of context session_id {} and task_id {:?}", + partition, + context.session_id(), + context.task_id() + ); let input = self.input.execute(partition, Arc::clone(&context))?; @@ -484,13 +497,13 @@ mod tests { use itertools::Itertools; use crate::collect; - use crate::expressions::col; use crate::expressions::PhysicalSortExpr; + use crate::expressions::col; use crate::sorts::sort::SortExec; use crate::test; - use crate::test::assert_is_pending; - use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec}; use crate::test::TestMemoryExec; + use crate::test::assert_is_pending; + use crate::test::exec::{BlockingExec, assert_strong_count_converges_to_zero}; use super::*; diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index 00cc96dccf28..9c92082d6877 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -37,7 +37,7 @@ use crate::metrics::{ BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RecordOutput, SpillMetrics, SplitMetrics, }; -use crate::projection::{make_with_child, update_ordering, ProjectionExec}; +use crate::projection::{ProjectionExec, make_with_child, update_ordering}; use crate::sorts::streaming_merge::{SortedSpillFile, StreamingMergeBuilder}; use crate::spill::get_record_batch_memory_size; use crate::spill::in_progress_spill_file::InProgressSpillFile; @@ -57,15 +57,15 @@ use arrow::compute::{concat_batches, lexsort_to_indices, take_arrays}; use arrow::datatypes::SchemaRef; use datafusion_common::config::SpillCompression; use datafusion_common::{ - assert_or_internal_err, internal_datafusion_err, unwrap_or_internal_err, - DataFusionError, Result, + DataFusionError, Result, assert_or_internal_err, internal_datafusion_err, + unwrap_or_internal_err, }; +use datafusion_execution::TaskContext; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::runtime_env::RuntimeEnv; -use datafusion_execution::TaskContext; -use datafusion_physical_expr::expressions::{lit, DynamicFilterPhysicalExpr}; use datafusion_physical_expr::LexOrdering; use datafusion_physical_expr::PhysicalExpr; +use datafusion_physical_expr::expressions::{DynamicFilterPhysicalExpr, lit}; use futures::{StreamExt, TryStreamExt}; use log::{debug, trace}; @@ -1087,13 +1087,16 @@ impl DisplayAs for SortExec { let preserve_partitioning = self.preserve_partitioning; match self.fetch { Some(fetch) => { - write!(f, "SortExec: TopK(fetch={fetch}), expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr)?; - if let Some(filter) = &self.filter { - if let Ok(current) = filter.read().expr().current() { - if !current.eq(&lit(true)) { - write!(f, ", filter=[{current}]")?; - } - } + write!( + f, + "SortExec: TopK(fetch={fetch}), expr=[{}], preserve_partitioning=[{preserve_partitioning}]", + self.expr + )?; + if let Some(filter) = &self.filter + && let Ok(current) = filter.read().expr().current() + && !current.eq(&lit(true)) + { + write!(f, ", filter=[{current}]")?; } if !self.common_sort_prefix.is_empty() { write!(f, ", sort_prefix=[")?; @@ -1111,7 +1114,11 @@ impl DisplayAs for SortExec { Ok(()) } } - None => write!(f, "SortExec: expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr), + None => write!( + f, + "SortExec: expr=[{}], preserve_partitioning=[{preserve_partitioning}]", + self.expr + ), } } DisplayFormatType::TreeRender => match self.fetch { @@ -1203,7 +1210,12 @@ impl ExecutionPlan for SortExec { partition: usize, context: Arc, ) -> Result { - trace!("Start SortExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()); + trace!( + "Start SortExec::execute for partition {} of context session_id {} and task_id {:?}", + partition, + context.session_id(), + context.task_id() + ); let mut input = self.input.execute(partition, Arc::clone(&context))?; @@ -1352,10 +1364,10 @@ impl ExecutionPlan for SortExec { let mut child = ChildFilterDescription::from_child(&parent_filters, self.input())?; - if let Some(filter) = &self.filter { - if config.optimizer.enable_topk_dynamic_filter_pushdown { - child = child.with_self_filter(filter.read().expr()); - } + if let Some(filter) = &self.filter + && config.optimizer.enable_topk_dynamic_filter_pushdown + { + child = child.with_self_filter(filter.read().expr()); } Ok(FilterDescription::new().with_child(child)) @@ -1374,8 +1386,8 @@ mod tests { use crate::execution_plan::Boundedness; use crate::expressions::col; use crate::test; - use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec}; use crate::test::TestMemoryExec; + use crate::test::exec::{BlockingExec, assert_strong_count_converges_to_zero}; use crate::test::{assert_is_pending, make_partition}; use arrow::array::*; @@ -1384,11 +1396,11 @@ mod tests { use datafusion_common::cast::as_primitive_array; use datafusion_common::test_util::batches_to_string; use datafusion_common::{DataFusionError, Result, ScalarValue}; + use datafusion_execution::RecordBatchStream; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; - use datafusion_execution::RecordBatchStream; - use datafusion_physical_expr::expressions::{Column, Literal}; use datafusion_physical_expr::EquivalenceProperties; + use datafusion_physical_expr::expressions::{Column, Literal}; use futures::{FutureExt, Stream}; use insta::assert_snapshot; @@ -2150,8 +2162,8 @@ mod tests { } #[tokio::test] - async fn should_return_stream_with_batches_in_the_requested_size_when_sorting_in_place( - ) -> Result<()> { + async fn should_return_stream_with_batches_in_the_requested_size_when_sorting_in_place() + -> Result<()> { let batch_size = 100; let create_task_ctx = |_: &[RecordBatch]| { @@ -2202,8 +2214,8 @@ mod tests { } #[tokio::test] - async fn should_return_stream_with_batches_in_the_requested_size_when_having_a_single_batch( - ) -> Result<()> { + async fn should_return_stream_with_batches_in_the_requested_size_when_having_a_single_batch() + -> Result<()> { let batch_size = 100; let create_task_ctx = |_: &[RecordBatch]| { @@ -2266,8 +2278,8 @@ mod tests { } #[tokio::test] - async fn should_return_stream_with_batches_in_the_requested_size_when_having_to_spill( - ) -> Result<()> { + async fn should_return_stream_with_batches_in_the_requested_size_when_having_to_spill() + -> Result<()> { let batch_size = 100; let create_task_ctx = |generated_batches: &[RecordBatch]| { diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs index 3361a7cdb718..e826f2f4aabb 100644 --- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs +++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs @@ -23,16 +23,16 @@ use std::sync::Arc; use crate::common::spawn_buffered; use crate::limit::LimitStream; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; -use crate::projection::{make_with_child, update_ordering, ProjectionExec}; +use crate::projection::{ProjectionExec, make_with_child, update_ordering}; use crate::sorts::streaming_merge::StreamingMergeBuilder; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, Partitioning, PlanProperties, SendableRecordBatchStream, Statistics, }; -use datafusion_common::{assert_eq_or_internal_err, internal_err, Result}; -use datafusion_execution::memory_pool::MemoryConsumer; +use datafusion_common::{Result, assert_eq_or_internal_err, internal_err}; use datafusion_execution::TaskContext; +use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_physical_expr_common::sort_expr::{LexOrdering, OrderingRequirements}; use crate::execution_plan::{EvaluationType, SchedulingType}; @@ -304,7 +304,9 @@ impl ExecutionPlan for SortPreservingMergeExec { 1 => match self.fetch { Some(fetch) => { let stream = self.input.execute(0, context)?; - debug!("Done getting stream for SortPreservingMergeExec::execute with 1 input with {fetch}"); + debug!( + "Done getting stream for SortPreservingMergeExec::execute with 1 input with {fetch}" + ); Ok(Box::pin(LimitStream::new( stream, 0, @@ -314,7 +316,9 @@ impl ExecutionPlan for SortPreservingMergeExec { } None => { let stream = self.input.execute(0, context); - debug!("Done getting stream for SortPreservingMergeExec::execute with 1 input without fetch"); + debug!( + "Done getting stream for SortPreservingMergeExec::execute with 1 input without fetch" + ); stream } }, @@ -327,7 +331,9 @@ impl ExecutionPlan for SortPreservingMergeExec { }) .collect::>()?; - debug!("Done setting up sender-receiver for SortPreservingMergeExec::execute"); + debug!( + "Done setting up sender-receiver for SortPreservingMergeExec::execute" + ); let result = StreamingMergeBuilder::new() .with_streams(receivers) @@ -340,7 +346,9 @@ impl ExecutionPlan for SortPreservingMergeExec { .with_round_robin_tie_breaker(self.enable_round_robin_repartition) .build()?; - debug!("Got stream result from SortPreservingMergeStream::new_from_receivers"); + debug!( + "Got stream result from SortPreservingMergeStream::new_from_receivers" + ); Ok(result) } @@ -396,7 +404,7 @@ mod tests { use std::fmt::Formatter; use std::pin::Pin; use std::sync::Mutex; - use std::task::{ready, Context, Poll, Waker}; + use std::task::{Context, Poll, Waker, ready}; use std::time::Duration; use super::*; @@ -408,8 +416,8 @@ mod tests { use crate::repartition::RepartitionExec; use crate::sorts::sort::SortExec; use crate::stream::RecordBatchReceiverStream; - use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec}; use crate::test::TestMemoryExec; + use crate::test::exec::{BlockingExec, assert_strong_count_converges_to_zero}; use crate::test::{self, assert_is_pending, make_partition}; use crate::{collect, common}; @@ -422,11 +430,11 @@ mod tests { use datafusion_common::test_util::batches_to_string; use datafusion_common::{assert_batches_eq, exec_err}; use datafusion_common_runtime::SpawnedTask; + use datafusion_execution::RecordBatchStream; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; - use datafusion_execution::RecordBatchStream; - use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::EquivalenceProperties; + use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; diff --git a/datafusion/physical-plan/src/sorts/stream.rs b/datafusion/physical-plan/src/sorts/stream.rs index b0c631cf9135..a510f44e4f4d 100644 --- a/datafusion/physical-plan/src/sorts/stream.rs +++ b/datafusion/physical-plan/src/sorts/stream.rs @@ -15,21 +15,21 @@ // specific language governing permissions and limitations // under the License. -use crate::sorts::cursor::{ArrayValues, CursorArray, RowValues}; use crate::SendableRecordBatchStream; +use crate::sorts::cursor::{ArrayValues, CursorArray, RowValues}; use crate::{PhysicalExpr, PhysicalSortExpr}; use arrow::array::Array; use arrow::datatypes::Schema; use arrow::record_batch::RecordBatch; use arrow::row::{RowConverter, Rows, SortField}; -use datafusion_common::{internal_datafusion_err, Result}; +use datafusion_common::{Result, internal_datafusion_err}; use datafusion_execution::memory_pool::MemoryReservation; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays; use futures::stream::{Fuse, StreamExt}; use std::marker::PhantomData; use std::sync::Arc; -use std::task::{ready, Context, Poll}; +use std::task::{Context, Poll, ready}; /// A [`Stream`](futures::Stream) that has multiple partitions that can /// be polled separately but not concurrently diff --git a/datafusion/physical-plan/src/sorts/streaming_merge.rs b/datafusion/physical-plan/src/sorts/streaming_merge.rs index 8003149a84f9..8129c3d8f695 100644 --- a/datafusion/physical-plan/src/sorts/streaming_merge.rs +++ b/datafusion/physical-plan/src/sorts/streaming_merge.rs @@ -28,7 +28,7 @@ use crate::{SendableRecordBatchStream, SpillManager}; use arrow::array::*; use arrow::datatypes::{DataType, SchemaRef}; use datafusion_common::human_readable_size; -use datafusion_common::{assert_or_internal_err, internal_err, Result}; +use datafusion_common::{Result, assert_or_internal_err, internal_err}; use datafusion_execution::disk_manager::RefCountedTempFile; use datafusion_execution::memory_pool::{ MemoryConsumer, MemoryPool, MemoryReservation, UnboundedMemoryPool, diff --git a/datafusion/physical-plan/src/spill/in_progress_spill_file.rs b/datafusion/physical-plan/src/spill/in_progress_spill_file.rs index e7f354a73b4c..d2acf4993b85 100644 --- a/datafusion/physical-plan/src/spill/in_progress_spill_file.rs +++ b/datafusion/physical-plan/src/spill/in_progress_spill_file.rs @@ -24,7 +24,7 @@ use arrow::array::RecordBatch; use datafusion_common::exec_datafusion_err; use datafusion_execution::disk_manager::RefCountedTempFile; -use super::{spill_manager::SpillManager, IPCStreamWriter}; +use super::{IPCStreamWriter, spill_manager::SpillManager}; /// Represents an in-progress spill file used for writing `RecordBatch`es to disk, created by `SpillManager`. /// Caller is able to use this struct to incrementally append in-memory batches to diff --git a/datafusion/physical-plan/src/spill/mod.rs b/datafusion/physical-plan/src/spill/mod.rs index 6be7edcf3291..1e7fa54a1b13 100644 --- a/datafusion/physical-plan/src/spill/mod.rs +++ b/datafusion/physical-plan/src/spill/mod.rs @@ -33,20 +33,20 @@ use std::ptr::NonNull; use std::sync::Arc; use std::task::{Context, Poll}; -use arrow::array::{layout, ArrayData, BufferSpec}; +use arrow::array::{ArrayData, BufferSpec, layout}; use arrow::datatypes::{Schema, SchemaRef}; use arrow::ipc::{ + MetadataVersion, reader::StreamReader, writer::{IpcWriteOptions, StreamWriter}, - MetadataVersion, }; use arrow::record_batch::RecordBatch; use datafusion_common::config::SpillCompression; -use datafusion_common::{exec_datafusion_err, DataFusionError, HashSet, Result}; +use datafusion_common::{DataFusionError, HashSet, Result, exec_datafusion_err}; use datafusion_common_runtime::SpawnedTask; -use datafusion_execution::disk_manager::RefCountedTempFile; use datafusion_execution::RecordBatchStream; +use datafusion_execution::disk_manager::RefCountedTempFile; use futures::{FutureExt as _, Stream}; use log::warn; @@ -154,11 +154,11 @@ impl SpillReaderStream { + SPILL_BATCH_MEMORY_MARGIN { warn!( - "Record batch memory usage ({actual_size} bytes) exceeds the expected limit ({max_record_batch_memory} bytes) \n\ + "Record batch memory usage ({actual_size} bytes) exceeds the expected limit ({max_record_batch_memory} bytes) \n\ by more than the allowed tolerance ({SPILL_BATCH_MEMORY_MARGIN} bytes).\n\ This likely indicates a bug in memory accounting during spilling.\n\ Please report this issue in https://github.com/apache/datafusion/issues/17340." - ); + ); } } self.state = SpillReaderStreamState::Waiting(reader); @@ -306,10 +306,10 @@ fn count_array_data_memory_size( } // Otherwise the buffer's memory is already counted } - if let Some(null_buffer) = array_data.nulls() { - if counted_buffers.insert(null_buffer.inner().inner().data_ptr()) { - *total_size += null_buffer.inner().inner().capacity(); - } + if let Some(null_buffer) = array_data.nulls() + && counted_buffers.insert(null_buffer.inner().inner().data_ptr()) + { + *total_size += null_buffer.inner().inner().capacity(); } // Count all children `ArrayData` recursively diff --git a/datafusion/physical-plan/src/spill/spill_manager.rs b/datafusion/physical-plan/src/spill/spill_manager.rs index 6fd97a8e2e6a..d4600673394b 100644 --- a/datafusion/physical-plan/src/spill/spill_manager.rs +++ b/datafusion/physical-plan/src/spill/spill_manager.rs @@ -23,11 +23,11 @@ use arrow::record_batch::RecordBatch; use datafusion_execution::runtime_env::RuntimeEnv; use std::sync::Arc; -use datafusion_common::{config::SpillCompression, Result}; -use datafusion_execution::disk_manager::RefCountedTempFile; +use datafusion_common::{Result, config::SpillCompression}; use datafusion_execution::SendableRecordBatchStream; +use datafusion_execution::disk_manager::RefCountedTempFile; -use super::{in_progress_spill_file::InProgressSpillFile, SpillReaderStream}; +use super::{SpillReaderStream, in_progress_spill_file::InProgressSpillFile}; use crate::coop::cooperative; use crate::{common::spawn_buffered, metrics::SpillMetrics}; diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs index 480b723d0b15..8b2ea1006893 100644 --- a/datafusion/physical-plan/src/stream.rs +++ b/datafusion/physical-plan/src/stream.rs @@ -29,7 +29,7 @@ use super::{ExecutionPlan, RecordBatchStream, SendableRecordBatchStream}; use crate::displayable; use arrow::{datatypes::SchemaRef, record_batch::RecordBatch}; -use datafusion_common::{exec_err, Result}; +use datafusion_common::{Result, exec_err}; use datafusion_common_runtime::JoinSet; use datafusion_execution::TaskContext; @@ -703,7 +703,7 @@ impl RecordBatchStream for BatchSplitStream { mod test { use super::*; use crate::test::exec::{ - assert_strong_count_converges_to_zero, BlockingExec, MockExec, PanicExec, + BlockingExec, MockExec, PanicExec, assert_strong_count_converges_to_zero, }; use arrow::datatypes::{DataType, Field, Schema}; diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs index f9a7feb9e726..c8b8d95718cb 100644 --- a/datafusion/physical-plan/src/streaming.rs +++ b/datafusion/physical-plan/src/streaming.rs @@ -23,18 +23,18 @@ use std::sync::Arc; use super::{DisplayAs, DisplayFormatType, PlanProperties}; use crate::coop::make_cooperative; -use crate::display::{display_orderings, ProjectSchemaDisplay}; +use crate::display::{ProjectSchemaDisplay, display_orderings}; use crate::execution_plan::{Boundedness, EmissionType, SchedulingType}; use crate::limit::LimitStream; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::projection::{ - all_alias_free_columns, new_projections_for_columns, update_ordering, ProjectionExec, + ProjectionExec, all_alias_free_columns, new_projections_for_columns, update_ordering, }; use crate::stream::RecordBatchStreamAdapter; use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream}; use arrow::datatypes::{Schema, SchemaRef}; -use datafusion_common::{internal_err, plan_err, Result}; +use datafusion_common::{Result, internal_err, plan_err}; use datafusion_execution::TaskContext; use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; @@ -346,7 +346,7 @@ mod test { use super::*; use crate::collect_partitioned; use crate::streaming::PartitionStream; - use crate::test::{make_partition, TestPartitionStream}; + use crate::test::{TestPartitionStream, make_partition}; use arrow::record_batch::RecordBatch; #[tokio::test] diff --git a/datafusion/physical-plan/src/test.rs b/datafusion/physical-plan/src/test.rs index e3b22611f4de..f2336920b357 100644 --- a/datafusion/physical-plan/src/test.rs +++ b/datafusion/physical-plan/src/test.rs @@ -25,19 +25,19 @@ use std::pin::Pin; use std::sync::Arc; use std::task::Context; +use crate::ExecutionPlan; use crate::common; use crate::execution_plan::{Boundedness, EmissionType}; use crate::memory::MemoryStream; use crate::metrics::MetricsSet; use crate::stream::RecordBatchStreamAdapter; use crate::streaming::PartitionStream; -use crate::ExecutionPlan; use crate::{DisplayAs, DisplayFormatType, PlanProperties}; use arrow::array::{Array, ArrayRef, Int32Array, RecordBatch}; use arrow_schema::{DataType, Field, Schema, SchemaRef}; use datafusion_common::{ - assert_or_internal_err, config::ConfigOptions, project_schema, Result, Statistics, + Result, Statistics, assert_or_internal_err, config::ConfigOptions, project_schema, }; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_expr::equivalence::{ @@ -105,10 +105,10 @@ impl DisplayAs for TestMemoryExec { .map_or(String::new(), |limit| format!(", fetch={limit}")); if self.show_sizes { write!( - f, - "partitions={}, partition_sizes={partition_sizes:?}{limit}{output_ordering}{constraints}", - partition_sizes.len(), - ) + f, + "partitions={}, partition_sizes={partition_sizes:?}{limit}{output_ordering}{constraints}", + partition_sizes.len(), + ) } else { write!( f, diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs index b720181b27fe..4507cccba05a 100644 --- a/datafusion/physical-plan/src/test/exec.rs +++ b/datafusion/physical-plan/src/test/exec.rs @@ -25,9 +25,9 @@ use std::{ }; use crate::{ - common, execution_plan::Boundedness, DisplayAs, DisplayFormatType, ExecutionPlan, - Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, - Statistics, + DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, + RecordBatchStream, SendableRecordBatchStream, Statistics, common, + execution_plan::Boundedness, }; use crate::{ execution_plan::EmissionType, @@ -36,7 +36,7 @@ use crate::{ use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use arrow::record_batch::RecordBatch; -use datafusion_common::{internal_err, DataFusionError, Result}; +use datafusion_common::{DataFusionError, Result, internal_err}; use datafusion_execution::TaskContext; use datafusion_physical_expr::EquivalenceProperties; @@ -522,8 +522,8 @@ pub struct StatisticsExec { impl StatisticsExec { pub fn new(stats: Statistics, schema: Schema) -> Self { assert_eq!( - stats - .column_statistics.len(), schema.fields().len(), + stats.column_statistics.len(), + schema.fields().len(), "if defined, the column statistics vector length should be the number of fields" ); let cache = Self::compute_properties(Arc::new(schema.clone())); diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs index 99af9b8f7ca1..ebac497f4fbc 100644 --- a/datafusion/physical-plan/src/topk/mod.rs +++ b/datafusion/physical-plan/src/topk/mod.rs @@ -19,7 +19,7 @@ use arrow::{ array::{Array, AsArray}, - compute::{interleave_record_batch, prep_null_mask_filter, FilterBuilder}, + compute::{FilterBuilder, interleave_record_batch, prep_null_mask_filter}, row::{RowConverter, Rows, SortField}, }; use datafusion_expr::{ColumnarValue, Operator}; @@ -30,20 +30,20 @@ use super::metrics::{ BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, RecordOutput, }; use crate::spill::get_record_batch_memory_size; -use crate::{stream::RecordBatchStreamAdapter, SendableRecordBatchStream}; +use crate::{SendableRecordBatchStream, stream::RecordBatchStreamAdapter}; use arrow::array::{ArrayRef, RecordBatch}; use arrow::datatypes::SchemaRef; use datafusion_common::{ - internal_datafusion_err, internal_err, HashMap, Result, ScalarValue, + HashMap, Result, ScalarValue, internal_datafusion_err, internal_err, }; use datafusion_execution::{ memory_pool::{MemoryConsumer, MemoryReservation}, runtime_env::RuntimeEnv, }; use datafusion_physical_expr::{ - expressions::{is_not_null, is_null, lit, BinaryExpr, DynamicFilterPhysicalExpr}, PhysicalExpr, + expressions::{BinaryExpr, DynamicFilterPhysicalExpr, is_not_null, is_null, lit}, }; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use parking_lot::RwLock; @@ -409,10 +409,10 @@ impl TopK { }; // Update the filter expression - if let Some(pred) = predicate { - if !pred.eq(&lit(true)) { - filter.expr.update(pred)?; - } + if let Some(pred) = predicate + && !pred.eq(&lit(true)) + { + filter.expr.update(pred)?; } Ok(()) @@ -870,7 +870,7 @@ impl TopKHeap { ScalarValue::try_from_array(&array, 0)? } array => { - return internal_err!("Expected a scalar value, got {:?}", array) + return internal_err!("Expected a scalar value, got {:?}", array); } }; diff --git a/datafusion/physical-plan/src/tree_node.rs b/datafusion/physical-plan/src/tree_node.rs index 85d7b33575ca..aa4f144f9189 100644 --- a/datafusion/physical-plan/src/tree_node.rs +++ b/datafusion/physical-plan/src/tree_node.rs @@ -20,10 +20,10 @@ use std::fmt::{self, Display, Formatter}; use std::sync::Arc; -use crate::{displayable, with_new_children_if_necessary, ExecutionPlan}; +use crate::{ExecutionPlan, displayable, with_new_children_if_necessary}; -use datafusion_common::tree_node::{ConcreteTreeNode, DynTreeNode}; use datafusion_common::Result; +use datafusion_common::tree_node::{ConcreteTreeNode, DynTreeNode}; impl DynTreeNode for dyn ExecutionPlan { fn arc_children(&self) -> Vec<&Arc> { diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index 06c28a8081ef..0c5b78c6ca68 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -27,18 +27,18 @@ use std::task::{Context, Poll}; use std::{any::Any, sync::Arc}; use super::{ - metrics::{ExecutionPlanMetricsSet, MetricsSet}, ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, + metrics::{ExecutionPlanMetricsSet, MetricsSet}, }; use crate::execution_plan::{ - boundedness_from_children, check_default_invariants, emission_type_from_children, - InvariantLevel, + InvariantLevel, boundedness_from_children, check_default_invariants, + emission_type_from_children, }; use crate::filter_pushdown::{FilterDescription, FilterPushdownPhase}; use crate::metrics::BaselineMetrics; -use crate::projection::{make_with_child, ProjectionExec}; +use crate::projection::{ProjectionExec, make_with_child}; use crate::stream::ObservedStream; use arrow::datatypes::{Field, Schema, SchemaRef}; @@ -46,10 +46,10 @@ use arrow::record_batch::RecordBatch; use datafusion_common::config::ConfigOptions; use datafusion_common::stats::Precision; use datafusion_common::{ - assert_or_internal_err, exec_err, internal_datafusion_err, Result, + Result, assert_or_internal_err, exec_err, internal_datafusion_err, }; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{calculate_union, EquivalenceProperties, PhysicalExpr}; +use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr, calculate_union}; use futures::Stream; use itertools::Itertools; @@ -267,7 +267,12 @@ impl ExecutionPlan for UnionExec { mut partition: usize, context: Arc, ) -> Result { - trace!("Start UnionExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()); + trace!( + "Start UnionExec::execute for partition {} of context session_id {} and task_id {:?}", + partition, + context.session_id(), + context.task_id() + ); let baseline_metrics = BaselineMetrics::new(&self.metrics, partition); // record the tiny amount of work done in this function so // elapsed_compute is reported as non zero @@ -498,7 +503,12 @@ impl ExecutionPlan for InterleaveExec { partition: usize, context: Arc, ) -> Result { - trace!("Start InterleaveExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()); + trace!( + "Start InterleaveExec::execute for partition {} of context session_id {} and task_id {:?}", + partition, + context.session_id(), + context.task_id() + ); let baseline_metrics = BaselineMetrics::new(&self.metrics, partition); // record the tiny amount of work done in this function so // elapsed_compute is reported as non zero @@ -591,7 +601,8 @@ fn union_schema(inputs: &[Arc]) -> Result { let base_field = first_schema.field(i).clone(); // Coerce metadata and nullability across all inputs - let merged_field = inputs + + inputs .iter() .enumerate() .map(|(input_idx, input)| { @@ -613,9 +624,7 @@ fn union_schema(inputs: &[Arc]) -> Result { // We can unwrap this because if inputs was empty, this would've already panic'ed when we // indexed into inputs[0]. .unwrap() - .with_name(base_field.name()); - - merged_field + .with_name(base_field.name()) }) .collect::>(); @@ -973,20 +982,24 @@ mod tests { fn test_union_empty_inputs() { // Test that UnionExec::try_new fails with empty inputs let result = UnionExec::try_new(vec![]); - assert!(result - .unwrap_err() - .to_string() - .contains("UnionExec requires at least one input")); + assert!( + result + .unwrap_err() + .to_string() + .contains("UnionExec requires at least one input") + ); } #[test] fn test_union_schema_empty_inputs() { // Test that union_schema fails with empty inputs let result = union_schema(&[]); - assert!(result - .unwrap_err() - .to_string() - .contains("Cannot create union schema from empty inputs")); + assert!( + result + .unwrap_err() + .to_string() + .contains("Cannot create union schema from empty inputs") + ); } #[test] diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs index 3c999b1a40c1..8184c9d2c520 100644 --- a/datafusion/physical-plan/src/unnest.rs +++ b/datafusion/physical-plan/src/unnest.rs @@ -18,7 +18,7 @@ //! Define a plan for unnesting values in columns that contain a list type. use std::cmp::{self, Ordering}; -use std::task::{ready, Poll}; +use std::task::{Poll, ready}; use std::{any::Any, sync::Arc}; use super::metrics::{ @@ -32,8 +32,8 @@ use crate::{ }; use arrow::array::{ - new_null_array, Array, ArrayRef, AsArray, BooleanBufferBuilder, FixedSizeListArray, - Int64Array, LargeListArray, ListArray, PrimitiveArray, Scalar, StructArray, + Array, ArrayRef, AsArray, BooleanBufferBuilder, FixedSizeListArray, Int64Array, + LargeListArray, ListArray, PrimitiveArray, Scalar, StructArray, new_null_array, }; use arrow::compute::kernels::length::length; use arrow::compute::kernels::zip::zip; @@ -43,13 +43,13 @@ use arrow::record_batch::RecordBatch; use arrow_ord::cmp::lt; use async_trait::async_trait; use datafusion_common::{ - exec_datafusion_err, exec_err, internal_err, Constraints, HashMap, HashSet, Result, - UnnestOptions, + Constraints, HashMap, HashSet, Result, UnnestOptions, exec_datafusion_err, exec_err, + internal_err, }; use datafusion_execution::TaskContext; +use datafusion_physical_expr::PhysicalExpr; use datafusion_physical_expr::equivalence::ProjectionMapping; use datafusion_physical_expr::expressions::Column; -use datafusion_physical_expr::PhysicalExpr; use futures::{Stream, StreamExt}; use log::trace; diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs index eb8bb982b882..302dd38380e1 100644 --- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs @@ -21,7 +21,7 @@ //! infinite inputs. use std::any::Any; -use std::cmp::{min, Ordering}; +use std::cmp::{Ordering, min}; use std::collections::VecDeque; use std::pin::Pin; use std::sync::Arc; @@ -52,11 +52,11 @@ use datafusion_common::utils::{ evaluate_partition_ranges, get_at_indices, get_row_at_idx, }; use datafusion_common::{ - arrow_datafusion_err, exec_datafusion_err, exec_err, HashMap, Result, + HashMap, Result, arrow_datafusion_err, exec_datafusion_err, exec_err, }; use datafusion_execution::TaskContext; -use datafusion_expr::window_state::{PartitionBatchState, WindowAggState}; use datafusion_expr::ColumnarValue; +use datafusion_expr::window_state::{PartitionBatchState, WindowAggState}; use datafusion_physical_expr::window::{ PartitionBatches, PartitionKey, PartitionWindowAggStates, WindowState, }; @@ -67,7 +67,7 @@ use datafusion_physical_expr_common::sort_expr::{ use ahash::RandomState; use futures::stream::Stream; -use futures::{ready, StreamExt}; +use futures::{StreamExt, ready}; use hashbrown::hash_table::HashTable; use indexmap::IndexMap; use log::debug; @@ -175,7 +175,9 @@ impl BoundedWindowAggExec { if self.window_expr()[0].partition_by().len() != ordered_partition_by_indices.len() { - return exec_err!("All partition by columns should have an ordering in Sorted mode."); + return exec_err!( + "All partition by columns should have an ordering in Sorted mode." + ); } Box::new(SortedSearch { partition_by_sort_keys, @@ -627,23 +629,23 @@ impl PartitionSearcher for LinearSearch { fn mark_partition_end(&self, partition_buffers: &mut PartitionBatches) { // We should be in the `PartiallySorted` case, otherwise we can not // tell when we are at the end of a given partition. - if !self.ordered_partition_by_indices.is_empty() { - if let Some((last_row, _)) = partition_buffers.last() { - let last_sorted_cols = self + if !self.ordered_partition_by_indices.is_empty() + && let Some((last_row, _)) = partition_buffers.last() + { + let last_sorted_cols = self + .ordered_partition_by_indices + .iter() + .map(|idx| last_row[*idx].clone()) + .collect::>(); + for (row, partition_batch_state) in partition_buffers.iter_mut() { + let sorted_cols = self .ordered_partition_by_indices .iter() - .map(|idx| last_row[*idx].clone()) - .collect::>(); - for (row, partition_batch_state) in partition_buffers.iter_mut() { - let sorted_cols = self - .ordered_partition_by_indices - .iter() - .map(|idx| &row[*idx]); - // All the partitions other than `last_sorted_cols` are done. - // We are sure that we will no longer receive values for these - // partitions (arrival of a new value would violate ordering). - partition_batch_state.is_end = !sorted_cols.eq(&last_sorted_cols); - } + .map(|idx| &row[*idx]); + // All the partitions other than `last_sorted_cols` are done. + // We are sure that we will no longer receive values for these + // partitions (arrival of a new value would violate ordering). + partition_batch_state.is_end = !sorted_cols.eq(&last_sorted_cols); } } } @@ -1247,18 +1249,18 @@ mod tests { use crate::streaming::{PartitionStream, StreamingTableExec}; use crate::test::TestMemoryExec; use crate::windows::{ - create_udwf_window_expr, create_window_expr, BoundedWindowAggExec, InputOrderMode, + BoundedWindowAggExec, InputOrderMode, create_udwf_window_expr, create_window_expr, }; - use crate::{displayable, execute_stream, ExecutionPlan}; + use crate::{ExecutionPlan, displayable, execute_stream}; use arrow::array::{ - builder::{Int64Builder, UInt64Builder}, RecordBatch, + builder::{Int64Builder, UInt64Builder}, }; use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion_common::test_util::batches_to_string; - use datafusion_common::{exec_datafusion_err, Result, ScalarValue}; + use datafusion_common::{Result, ScalarValue, exec_datafusion_err}; use datafusion_execution::config::SessionConfig; use datafusion_execution::{ RecordBatchStream, SendableRecordBatchStream, TaskContext, @@ -1269,12 +1271,12 @@ mod tests { use datafusion_functions_aggregate::count::count_udaf; use datafusion_functions_window::nth_value::last_value_udwf; use datafusion_functions_window::nth_value::nth_value_udwf; - use datafusion_physical_expr::expressions::{col, Column, Literal}; + use datafusion_physical_expr::expressions::{Column, Literal, col}; use datafusion_physical_expr::window::StandardWindowExpr; use datafusion_physical_expr::{LexOrdering, PhysicalExpr}; use futures::future::Shared; - use futures::{pin_mut, ready, FutureExt, Stream, StreamExt}; + use futures::{FutureExt, Stream, StreamExt, pin_mut, ready}; use insta::assert_snapshot; use itertools::Itertools; use tokio::time::timeout; @@ -1482,14 +1484,16 @@ mod tests { } fn schema_orders(schema: &SchemaRef) -> Result> { - let orderings = vec![[PhysicalSortExpr { - expr: col("sn", schema)?, - options: SortOptions { - descending: false, - nulls_first: false, - }, - }] - .into()]; + let orderings = vec![ + [PhysicalSortExpr { + expr: col("sn", schema)?, + options: SortOptions { + descending: false, + nulls_first: false, + }, + }] + .into(), + ]; Ok(orderings) } diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 01926ab156c7..d0e1eab09987 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -25,13 +25,13 @@ use std::borrow::Borrow; use std::sync::Arc; use crate::{ - expressions::PhysicalSortExpr, ExecutionPlan, ExecutionPlanProperties, - InputOrderMode, PhysicalExpr, + ExecutionPlan, ExecutionPlanProperties, InputOrderMode, PhysicalExpr, + expressions::PhysicalSortExpr, }; use arrow::datatypes::{Schema, SchemaRef}; use arrow_schema::{FieldRef, SortOptions}; -use datafusion_common::{exec_err, Result}; +use datafusion_common::{Result, exec_err}; use datafusion_expr::{ LimitEffect, PartitionEvaluator, ReversedUDWF, SetMonotonicity, WindowFrame, WindowFunctionDefinition, WindowUDF, @@ -389,11 +389,11 @@ pub(crate) fn window_equivalence_properties( let mut found = false; for sort_expr in sort_options.into_iter() { candidate_ordering.push(sort_expr); - if let Some(lex) = LexOrdering::new(candidate_ordering.clone()) { - if window_eq_properties.ordering_satisfy(lex)? { - found = true; - break; - } + if let Some(lex) = LexOrdering::new(candidate_ordering.clone()) + && window_eq_properties.ordering_satisfy(lex)? + { + found = true; + break; } // This option didn't work, remove it and try the next one candidate_ordering.pop(); @@ -407,10 +407,10 @@ pub(crate) fn window_equivalence_properties( // If we successfully built an ordering for all columns, use it // When there are no partition expressions, candidate_ordering will be empty and won't be added - if candidate_ordering.len() == partitioning_exprs.len() { - if let Some(lex) = LexOrdering::new(candidate_ordering) { - all_satisfied_lexs.push(lex); - } + if candidate_ordering.len() == partitioning_exprs.len() + && let Some(lex) = LexOrdering::new(candidate_ordering) + { + all_satisfied_lexs.push(lex); } // If there is a partitioning, and no possible ordering cannot satisfy // the input plan's orderings, then we cannot further introduce any @@ -512,21 +512,21 @@ pub(crate) fn window_equivalence_properties( let is_asc = !sort_expr.options.descending; candidate_order.push(sort_expr); - if let Some(lex) = LexOrdering::new(candidate_order.clone()) { - if window_eq_properties.ordering_satisfy(lex)? { - if idx == 0 { - // The first column's ordering direction determines the overall - // monotonicity behavior of the window result. - // - If the aggregate has increasing set monotonicity (e.g., MAX, COUNT) - // and the first arg is ascending, the window result is increasing - // - If the aggregate has decreasing set monotonicity (e.g., MIN) - // and the first arg is ascending, the window result is also increasing - // This flag is used to determine the final window column ordering. - asc = is_asc; - } - found = true; - break; + if let Some(lex) = LexOrdering::new(candidate_order.clone()) + && window_eq_properties.ordering_satisfy(lex)? + { + if idx == 0 { + // The first column's ordering direction determines the overall + // monotonicity behavior of the window result. + // - If the aggregate has increasing set monotonicity (e.g., MAX, COUNT) + // and the first arg is ascending, the window result is increasing + // - If the aggregate has decreasing set monotonicity (e.g., MIN) + // and the first arg is ascending, the window result is also increasing + // This flag is used to determine the final window column ordering. + asc = is_asc; } + found = true; + break; } // This option didn't work, remove it and try the next one candidate_order.pop(); @@ -740,13 +740,13 @@ mod tests { use crate::expressions::col; use crate::streaming::StreamingTableExec; use crate::test::assert_is_pending; - use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec}; + use crate::test::exec::{BlockingExec, assert_strong_count_converges_to_zero}; + use InputOrderMode::{Linear, PartiallySorted, Sorted}; use arrow::compute::SortOptions; use arrow_schema::{DataType, Field}; use datafusion_execution::TaskContext; use datafusion_functions_aggregate::count::count_udaf; - use InputOrderMode::{Linear, PartiallySorted, Sorted}; use futures::FutureExt; diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs index b588608397f4..d6d5f4fdd2a6 100644 --- a/datafusion/physical-plan/src/windows/window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs @@ -42,13 +42,13 @@ use arrow::error::ArrowError; use arrow::record_batch::RecordBatch; use datafusion_common::stats::Precision; use datafusion_common::utils::{evaluate_partition_ranges, transpose}; -use datafusion_common::{assert_eq_or_internal_err, Result}; +use datafusion_common::{Result, assert_eq_or_internal_err}; use datafusion_execution::TaskContext; use datafusion_physical_expr_common::sort_expr::{ OrderingRequirements, PhysicalSortExpr, }; -use futures::{ready, Stream, StreamExt}; +use futures::{Stream, StreamExt, ready}; /// Window execution plan #[derive(Debug, Clone)] diff --git a/datafusion/physical-plan/src/work_table.rs b/datafusion/physical-plan/src/work_table.rs index b0f300dd371f..ba7c98c26480 100644 --- a/datafusion/physical-plan/src/work_table.rs +++ b/datafusion/physical-plan/src/work_table.rs @@ -31,9 +31,9 @@ use crate::{ use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; -use datafusion_common::{assert_eq_or_internal_err, internal_datafusion_err, Result}; -use datafusion_execution::memory_pool::MemoryReservation; +use datafusion_common::{Result, assert_eq_or_internal_err, internal_datafusion_err}; use datafusion_execution::TaskContext; +use datafusion_execution::memory_pool::MemoryReservation; use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; /// A vector of record batches with a memory reservation. diff --git a/datafusion/session/Cargo.toml b/datafusion/session/Cargo.toml index 230e26d1fc9f..3bed85142510 100644 --- a/datafusion/session/Cargo.toml +++ b/datafusion/session/Cargo.toml @@ -20,7 +20,7 @@ name = "datafusion-session" description = "datafusion-session" readme = "README.md" authors.workspace = true -edition.workspace = true +edition = "2024" homepage.workspace = true license.workspace = true repository.workspace = true diff --git a/datafusion/session/src/session.rs b/datafusion/session/src/session.rs index 14757da67d84..2593e8cd71f4 100644 --- a/datafusion/session/src/session.rs +++ b/datafusion/session/src/session.rs @@ -18,9 +18,9 @@ use async_trait::async_trait; use datafusion_common::config::{ConfigOptions, TableOptions}; use datafusion_common::{DFSchema, Result}; +use datafusion_execution::TaskContext; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnv; -use datafusion_execution::TaskContext; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::{AggregateUDF, Expr, LogicalPlan, ScalarUDF, WindowUDF}; use datafusion_physical_plan::{ExecutionPlan, PhysicalExpr}; diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index a814292a3d71..f7afbe9c3111 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -21,7 +21,7 @@ description = "DataFusion SQL Query Planner" keywords = ["datafusion", "sql", "parser", "planner"] readme = "README.md" version = { workspace = true } -edition = { workspace = true } +edition = "2024" homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } diff --git a/datafusion/sql/examples/sql.rs b/datafusion/sql/examples/sql.rs index 2c0bb86cd808..dbedaf3f15b8 100644 --- a/datafusion/sql/examples/sql.rs +++ b/datafusion/sql/examples/sql.rs @@ -20,11 +20,11 @@ use std::{collections::HashMap, sync::Arc}; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::config::ConfigOptions; -use datafusion_common::{plan_err, Result, TableReference}; -use datafusion_expr::planner::ExprPlanner; +use datafusion_common::{Result, TableReference, plan_err}; use datafusion_expr::WindowUDF; +use datafusion_expr::planner::ExprPlanner; use datafusion_expr::{ - logical_plan::builder::LogicalTableSource, AggregateUDF, ScalarUDF, TableSource, + AggregateUDF, ScalarUDF, TableSource, logical_plan::builder::LogicalTableSource, }; use datafusion_functions::core::planner::CoreFunctionPlanner; use datafusion_functions_aggregate::count::count_udaf; diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs index 6959ce17abef..18766d705635 100644 --- a/datafusion/sql/src/cte.rs +++ b/datafusion/sql/src/cte.rs @@ -20,9 +20,8 @@ use std::sync::Arc; use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_common::{ - not_impl_err, plan_err, + Result, not_impl_err, plan_err, tree_node::{TreeNode, TreeNodeRecursion}, - Result, }; use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, TableSource}; use sqlparser::ast::{Query, SetExpr, SetOperator, With}; @@ -191,11 +190,11 @@ fn has_work_table_reference( ) -> bool { let mut has_reference = false; plan.apply(|node| { - if let LogicalPlan::TableScan(scan) = node { - if Arc::ptr_eq(&scan.source, work_table_source) { - has_reference = true; - return Ok(TreeNodeRecursion::Stop); - } + if let LogicalPlan::TableScan(scan) = node + && Arc::ptr_eq(&scan.source, work_table_source) + { + has_reference = true; + return Ok(TreeNodeRecursion::Stop); } Ok(TreeNodeRecursion::Continue) }) diff --git a/datafusion/sql/src/expr/binary_op.rs b/datafusion/sql/src/expr/binary_op.rs index f0ca54161782..edad5bbc6daa 100644 --- a/datafusion/sql/src/expr/binary_op.rs +++ b/datafusion/sql/src/expr/binary_op.rs @@ -16,7 +16,7 @@ // under the License. use crate::planner::{ContextProvider, SqlToRel}; -use datafusion_common::{not_impl_err, Result}; +use datafusion_common::{Result, not_impl_err}; use datafusion_expr::Operator; use sqlparser::ast::BinaryOperator; diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index badeab46c837..b29531fdeaf4 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -19,14 +19,13 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use arrow::datatypes::DataType; use datafusion_common::{ - internal_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, - DFSchema, Dependency, Diagnostic, Result, Span, + DFSchema, Dependency, Diagnostic, Result, Span, internal_datafusion_err, + internal_err, not_impl_err, plan_datafusion_err, plan_err, }; use datafusion_expr::{ - expr, + Expr, ExprSchemable, SortExpr, WindowFrame, WindowFunctionDefinition, expr, expr::{NullTreatment, ScalarFunction, Unnest, WildcardOptions, WindowFunction}, planner::{PlannerResult, RawAggregateExpr, RawWindowExpr}, - Expr, ExprSchemable, SortExpr, WindowFrame, WindowFunctionDefinition, }; use sqlparser::ast::{ DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg, @@ -152,42 +151,46 @@ impl FunctionArgs { FunctionArgumentClause::OrderBy(oby) => { if order_by.is_some() { if !within_group.is_empty() { - return plan_err!("ORDER BY clause is only permitted in WITHIN GROUP clause when a WITHIN GROUP is used"); + return plan_err!( + "ORDER BY clause is only permitted in WITHIN GROUP clause when a WITHIN GROUP is used" + ); } - return not_impl_err!("Calling {name}: Duplicated ORDER BY clause in function arguments"); + return not_impl_err!( + "Calling {name}: Duplicated ORDER BY clause in function arguments" + ); } order_by = Some(oby); } FunctionArgumentClause::Limit(limit) => { return not_impl_err!( "Calling {name}: LIMIT not supported in function arguments: {limit}" - ) + ); } FunctionArgumentClause::OnOverflow(overflow) => { return not_impl_err!( "Calling {name}: ON OVERFLOW not supported in function arguments: {overflow}" - ) + ); } FunctionArgumentClause::Having(having) => { return not_impl_err!( "Calling {name}: HAVING not supported in function arguments: {having}" - ) + ); } FunctionArgumentClause::Separator(sep) => { return not_impl_err!( "Calling {name}: SEPARATOR not supported in function arguments: {sep}" - ) + ); } FunctionArgumentClause::JsonNullClause(jn) => { return not_impl_err!( "Calling {name}: JSON NULL clause not supported in function arguments: {jn}" - ) + ); } FunctionArgumentClause::JsonReturningClause(jr) => { return not_impl_err!( "Calling {name}: JSON RETURNING clause not supported in function arguments: {jr}" - ) - }, + ); + } } } @@ -237,12 +240,16 @@ impl SqlToRel<'_, S> { } = function_args; if over.is_some() && !within_group.is_empty() { - return plan_err!("OVER and WITHIN GROUP clause cannot be used together. \ - OVER is for window functions, whereas WITHIN GROUP is for ordered set aggregate functions"); + return plan_err!( + "OVER and WITHIN GROUP clause cannot be used together. \ + OVER is for window functions, whereas WITHIN GROUP is for ordered set aggregate functions" + ); } if !order_by.is_empty() && !within_group.is_empty() { - return plan_err!("ORDER BY and WITHIN GROUP clauses cannot be used together in the same aggregate function"); + return plan_err!( + "ORDER BY and WITHIN GROUP clauses cannot be used together in the same aggregate function" + ); } // If function is a window function (it has an OVER clause), @@ -261,7 +268,7 @@ impl SqlToRel<'_, S> { return plan_err!( "Expected an identifier in function name, but found {:?}", object_name.0[0] - ) + ); } } }; diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs index 79f65e3f59b3..4c23c7a818be 100644 --- a/datafusion/sql/src/expr/identifier.rs +++ b/datafusion/sql/src/expr/identifier.rs @@ -18,8 +18,8 @@ use arrow::datatypes::FieldRef; use datafusion_common::datatype::DataTypeExt; use datafusion_common::{ - assert_or_internal_err, exec_datafusion_err, internal_err, not_impl_err, - plan_datafusion_err, plan_err, Column, DFSchema, Result, Span, TableReference, + Column, DFSchema, Result, Span, TableReference, assert_or_internal_err, + exec_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, }; use datafusion_expr::planner::PlannerResult; use datafusion_expr::{Case, Expr}; @@ -67,33 +67,32 @@ impl SqlToRel<'_, S> { qualifier.filter(|q| q.table() != UNNAMED_TABLE).cloned(), normalize_ident, ); - if self.options.collect_spans { - if let Some(span) = Span::try_from_sqlparser_span(id_span) { - column.spans_mut().add_span(span); - } + if self.options.collect_spans + && let Some(span) = Span::try_from_sqlparser_span(id_span) + { + column.spans_mut().add_span(span); } return Ok(Expr::Column(column)); } // Check the outer query schema - if let Some(outer) = planner_context.outer_query_schema() { - if let Ok((qualifier, field)) = + if let Some(outer) = planner_context.outer_query_schema() + && let Ok((qualifier, field)) = outer.qualified_field_with_unqualified_name(normalize_ident.as_str()) - { - // Found an exact match on a qualified name in the outer plan schema, so this is an outer reference column - return Ok(Expr::OuterReferenceColumn( - Arc::clone(field), - Column::from((qualifier, field)), - )); - } + { + // Found an exact match on a qualified name in the outer plan schema, so this is an outer reference column + return Ok(Expr::OuterReferenceColumn( + Arc::clone(field), + Column::from((qualifier, field)), + )); } // Default case let mut column = Column::new_unqualified(normalize_ident); - if self.options.collect_spans { - if let Some(span) = Span::try_from_sqlparser_span(id_span) { - column.spans_mut().add_span(span); - } + if self.options.collect_spans + && let Some(span) = Span::try_from_sqlparser_span(id_span) + { + column.spans_mut().add_span(span); } Ok(Expr::Column(column)) } @@ -159,10 +158,10 @@ impl SqlToRel<'_, S> { // Found matching field with no spare identifier(s) Some((field, qualifier, _nested_names)) => { let mut column = Column::from((qualifier, field)); - if self.options.collect_spans { - if let Some(span) = ids_span { - column.spans_mut().add_span(span); - } + if self.options.collect_spans + && let Some(span) = ids_span + { + column.spans_mut().add_span(span); } Ok(Expr::Column(column)) } @@ -183,7 +182,8 @@ impl SqlToRel<'_, S> { // TODO: remove when can support nested identifiers for OuterReferenceColumn not_impl_err!( "Nested identifiers are not yet supported for OuterReferenceColumn {}", - Column::from((qualifier, field)).quoted_flat_name() + Column::from((qualifier, field)) + .quoted_flat_name() ) } // Found matching field with no spare identifier(s) @@ -208,10 +208,10 @@ impl SqlToRel<'_, S> { // Safe unwrap as s can never be empty or exceed the bounds let (relation, column_name) = form_identifier(s).unwrap(); let mut column = Column::new(relation, column_name); - if self.options.collect_spans { - if let Some(span) = ids_span { - column.spans_mut().add_span(span); - } + if self.options.collect_spans + && let Some(span) = ids_span + { + column.spans_mut().add_span(span); } Ok(Expr::Column(column)) } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 9725025d599f..83893e82c26c 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -27,15 +27,15 @@ use sqlparser::ast::{ }; use datafusion_common::{ - internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result, - ScalarValue, + DFSchema, Result, ScalarValue, internal_datafusion_err, internal_err, not_impl_err, + plan_err, }; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::expr::{InList, WildcardOptions}; use datafusion_expr::{ - lit, Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal, - Operator, TryCast, + Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal, + Operator, TryCast, lit, }; use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; @@ -577,7 +577,7 @@ impl SqlToRel<'_, S> { _ => { return not_impl_err!( "Unsupported ast node in sqltorel: {time_zone:?}" - ) + ); } }, ))), @@ -855,7 +855,11 @@ impl SqlToRel<'_, S> { Some(Value::SingleQuotedString(char)) if char.len() == 1 => { Some(char.chars().next().unwrap()) } - Some(value) => return plan_err!("Invalid escape character in LIKE expression. Expected a single character wrapped with single quotes, got {value}"), + Some(value) => { + return plan_err!( + "Invalid escape character in LIKE expression. Expected a single character wrapped with single quotes, got {value}" + ); + } None => None, }; Ok(Expr::Like(Like::new( @@ -885,7 +889,11 @@ impl SqlToRel<'_, S> { Some(Value::SingleQuotedString(char)) if char.len() == 1 => { Some(char.chars().next().unwrap()) } - Some(value) => return plan_err!("Invalid escape character in SIMILAR TO expression. Expected a single character wrapped with single quotes, got {value}"), + Some(value) => { + return plan_err!( + "Invalid escape character in SIMILAR TO expression. Expected a single character wrapped with single quotes, got {value}" + ); + } None => None, }; Ok(Expr::SimilarTo(Like::new( @@ -1206,8 +1214,8 @@ mod tests { use sqlparser::dialect::GenericDialect; use sqlparser::parser::Parser; - use datafusion_common::config::ConfigOptions; use datafusion_common::TableReference; + use datafusion_common::config::ConfigOptions; use datafusion_expr::logical_plan::builder::LogicalTableSource; use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF}; diff --git a/datafusion/sql/src/expr/order_by.rs b/datafusion/sql/src/expr/order_by.rs index 79ebc5943ffb..faecfbcfecc0 100644 --- a/datafusion/sql/src/expr/order_by.rs +++ b/datafusion/sql/src/expr/order_by.rs @@ -17,7 +17,7 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_common::{ - not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema, Result, + Column, DFSchema, Result, not_impl_err, plan_datafusion_err, plan_err, }; use datafusion_expr::expr::Sort; use datafusion_expr::{Expr, SortExpr}; diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index 4bca6f7e49ba..ec34ff3d5342 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -16,7 +16,7 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_common::{plan_err, DFSchema, Diagnostic, Result, Span, Spans}; +use datafusion_common::{DFSchema, Diagnostic, Result, Span, Spans, plan_err}; use datafusion_expr::expr::{Exists, InSubquery}; use datafusion_expr::{Expr, LogicalPlan, Subquery}; use sqlparser::ast::Expr as SQLExpr; @@ -60,10 +60,10 @@ impl SqlToRel<'_, S> { let mut spans = Spans::new(); if let SetExpr::Select(select) = &subquery.body.as_ref() { for item in &select.projection { - if let SelectItem::UnnamedExpr(SQLExpr::Identifier(ident)) = item { - if let Some(span) = Span::try_from_sqlparser_span(ident.span) { - spans.add_span(span); - } + if let SelectItem::UnnamedExpr(SQLExpr::Identifier(ident)) = item + && let Some(span) = Span::try_from_sqlparser_span(ident.span) + { + spans.add_span(span); } } } @@ -103,10 +103,10 @@ impl SqlToRel<'_, S> { let mut spans = Spans::new(); if let SetExpr::Select(select) = subquery.body.as_ref() { for item in &select.projection { - if let SelectItem::ExprWithAlias { alias, .. } = item { - if let Some(span) = Span::try_from_sqlparser_span(alias.span) { - spans.add_span(span); - } + if let SelectItem::ExprWithAlias { alias, .. } = item + && let Some(span) = Span::try_from_sqlparser_span(alias.span) + { + spans.add_span(span); } } } diff --git a/datafusion/sql/src/expr/substring.rs b/datafusion/sql/src/expr/substring.rs index 0ff361be0e20..d3b56097c1f5 100644 --- a/datafusion/sql/src/expr/substring.rs +++ b/datafusion/sql/src/expr/substring.rs @@ -16,9 +16,9 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_common::{not_impl_err, plan_err}; use datafusion_common::{DFSchema, Result, ScalarValue}; -use datafusion_expr::{planner::PlannerResult, Expr}; +use datafusion_common::{not_impl_err, plan_err}; +use datafusion_expr::{Expr, planner::PlannerResult}; use sqlparser::ast::Expr as SQLExpr; @@ -79,7 +79,9 @@ impl SqlToRel<'_, S> { } } - not_impl_err!("Substring could not be planned by registered expr planner. \ - Hint: Please try with `unicode_expressions` DataFusion feature enabled") + not_impl_err!( + "Substring could not be planned by registered expr planner. \ + Hint: Please try with `unicode_expressions` DataFusion feature enabled" + ) } } diff --git a/datafusion/sql/src/expr/unary_op.rs b/datafusion/sql/src/expr/unary_op.rs index f63140230b60..cd118c0fdd5c 100644 --- a/datafusion/sql/src/expr/unary_op.rs +++ b/datafusion/sql/src/expr/unary_op.rs @@ -16,10 +16,10 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_common::{not_impl_err, plan_err, DFSchema, Diagnostic, Result}; +use datafusion_common::{DFSchema, Diagnostic, Result, not_impl_err, plan_err}; use datafusion_expr::{ - type_coercion::{is_interval, is_timestamp}, Expr, ExprSchemable, + type_coercion::{is_interval, is_timestamp}, }; use sqlparser::ast::{Expr as SQLExpr, UnaryOperator, Value, ValueWithSpan}; diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs index 8ca059d08c16..ad057ba4c6e4 100644 --- a/datafusion/sql/src/expr/value.rs +++ b/datafusion/sql/src/expr/value.rs @@ -17,20 +17,20 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use arrow::compute::kernels::cast_utils::{ - parse_interval_month_day_nano_config, IntervalParseConfig, IntervalUnit, + IntervalParseConfig, IntervalUnit, parse_interval_month_day_nano_config, }; use arrow::datatypes::{ - i256, FieldRef, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, + DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, FieldRef, i256, }; use bigdecimal::num_bigint::BigInt; use bigdecimal::{BigDecimal, Signed, ToPrimitive}; use datafusion_common::{ - internal_datafusion_err, not_impl_err, plan_err, DFSchema, DataFusionError, Result, - ScalarValue, + DFSchema, DataFusionError, Result, ScalarValue, internal_datafusion_err, + not_impl_err, plan_err, }; use datafusion_expr::expr::{BinaryExpr, Placeholder}; use datafusion_expr::planner::PlannerResult; -use datafusion_expr::{lit, Expr, Operator}; +use datafusion_expr::{Expr, Operator, lit}; use log::debug; use sqlparser::ast::{ BinaryOperator, Expr as SQLExpr, Interval, UnaryOperator, Value, ValueWithSpan, @@ -86,10 +86,8 @@ impl SqlToRel<'_, S> { return Ok(lit(n)); } - if !negative { - if let Ok(n) = unsigned_number.parse::() { - return Ok(lit(n)); - } + if !negative && let Ok(n) = unsigned_number.parse::() { + return Ok(lit(n)); } if self.options.parse_float_as_decimal { @@ -181,7 +179,9 @@ impl SqlToRel<'_, S> { } } - not_impl_err!("Could not plan array literal. Hint: Please try with `nested_expressions` DataFusion feature enabled") + not_impl_err!( + "Could not plan array literal. Hint: Please try with `nested_expressions` DataFusion feature enabled" + ) } /// Convert a SQL interval expression to a DataFusion logical plan @@ -294,14 +294,12 @@ fn interval_literal(interval_value: SQLExpr, negative: bool) -> Result { interval_literal(*expr, negative)? } _ => { - return not_impl_err!("Unsupported interval argument. Expected string literal or number, got: {interval_value:?}"); + return not_impl_err!( + "Unsupported interval argument. Expected string literal or number, got: {interval_value:?}" + ); } }; - if negative { - Ok(format!("-{s}")) - } else { - Ok(s) - } + if negative { Ok(format!("-{s}")) } else { Ok(s) } } /// Try to decode bytes from hex literal string. @@ -504,9 +502,7 @@ mod tests { // scale < i8::MIN assert_eq!( - parse_decimal("1e129", false) - .unwrap_err() - .strip_backtrace(), + parse_decimal("1e129", false).unwrap_err().strip_backtrace(), "This feature is not implemented: Decimal scale -129 exceeds the minimum supported scale: -128" ); diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index e3622bcaf284..27db2b0f9757 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -20,9 +20,9 @@ //! This parser implements DataFusion specific statements such as //! `CREATE EXTERNAL TABLE` -use datafusion_common::config::SqlParserOptions; use datafusion_common::DataFusionError; -use datafusion_common::{sql_err, Diagnostic, Span}; +use datafusion_common::config::SqlParserOptions; +use datafusion_common::{Diagnostic, Span, sql_err}; use sqlparser::ast::{ExprWithAlias, Ident, OrderByOptions}; use sqlparser::tokenizer::TokenWithSpan; use sqlparser::{ @@ -30,7 +30,7 @@ use sqlparser::{ ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query, Statement as SQLStatement, TableConstraint, Value, }, - dialect::{keywords::Keyword, Dialect, GenericDialect}, + dialect::{Dialect, GenericDialect, keywords::Keyword}, parser::{Parser, ParserError}, tokenizer::{Token, Tokenizer, Word}, }; @@ -640,7 +640,9 @@ impl<'a> DFParser<'a> { Keyword::WITH => { self.parser.expect_keyword(Keyword::HEADER)?; self.parser.expect_keyword(Keyword::ROW)?; - return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')")?; + return parser_err!( + "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')" + )?; } Keyword::PARTITIONED => { self.parser.expect_keyword(Keyword::BY)?; @@ -1024,15 +1026,21 @@ impl<'a> DFParser<'a> { } else { self.parser.expect_keyword(Keyword::HEADER)?; self.parser.expect_keyword(Keyword::ROW)?; - return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)")?; + return parser_err!( + "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)" + )?; } } Keyword::DELIMITER => { - return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')")?; + return parser_err!( + "DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')" + )?; } Keyword::COMPRESSION => { self.parser.expect_keyword(Keyword::TYPE)?; - return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)")?; + return parser_err!( + "COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)" + )?; } Keyword::PARTITIONED => { self.parser.expect_keyword(Keyword::BY)?; @@ -1385,8 +1393,7 @@ mod tests { expect_parse_ok(sql, expected)?; // positive case: it is ok for avro files not to have columns specified - let sql = - "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'"; + let sql = "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'"; let expected = Statement::CreateExternalTable(CreateExternalTable { name: name.clone(), columns: vec![], @@ -1423,8 +1430,7 @@ mod tests { expect_parse_ok(sql, expected)?; // positive case: column definition allowed in 'partition by' clause - let sql = - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'"; + let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'"; let expected = Statement::CreateExternalTable(CreateExternalTable { name: name.clone(), columns: vec![ @@ -1445,17 +1451,18 @@ mod tests { expect_parse_ok(sql, expected)?; // negative case: mixed column defs and column names in `PARTITIONED BY` clause - let sql = - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'"; + let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'"; expect_parse_error( sql, "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")", ); // negative case: mixed column defs and column names in `PARTITIONED BY` clause - let sql = - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'"; - expect_parse_error(sql, "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")"); + let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'"; + expect_parse_error( + sql, + "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")", + ); // positive case: additional options (one entry) can be specified let sql = @@ -1477,8 +1484,7 @@ mod tests { expect_parse_ok(sql, expected)?; // positive case: additional options (multiple entries) can be specified - let sql = - "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'"; + let sql = "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'"; let expected = Statement::CreateExternalTable(CreateExternalTable { name: name.clone(), columns: vec![], @@ -1499,15 +1505,17 @@ mod tests { expect_parse_ok(sql, expected)?; // Ordered Col - let sqls = ["CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'", - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'", - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'", - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'", - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'", - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'", - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'", - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'", - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'"]; + let sqls = [ + "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'", + "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'", + "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'", + "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'", + "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'", + "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'", + "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'", + "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'", + "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'", + ]; let expected = vec![ (None, None), (None, Some(true)), @@ -1918,8 +1926,7 @@ mod tests { #[test] fn copy_to_multi_options() -> Result<(), DataFusionError> { // order of options is preserved - let sql = - "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)"; + let sql = "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)"; let expected_options = vec![ ( diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index eb1e711eb4fd..eb798b71e455 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -23,19 +23,19 @@ use std::vec; use crate::utils::make_decimal_type; use arrow::datatypes::*; +use datafusion_common::TableReference; use datafusion_common::config::SqlParserOptions; use datafusion_common::datatype::{DataTypeExt, FieldExt}; use datafusion_common::error::add_possible_columns_to_diag; -use datafusion_common::TableReference; +use datafusion_common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err}; use datafusion_common::{ - field_not_found, internal_err, plan_datafusion_err, DFSchemaRef, Diagnostic, - SchemaError, + DFSchemaRef, Diagnostic, SchemaError, field_not_found, internal_err, + plan_datafusion_err, }; -use datafusion_common::{not_impl_err, plan_err, DFSchema, DataFusionError, Result}; use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder}; pub use datafusion_expr::planner::ContextProvider; use datafusion_expr::utils::find_column_exprs; -use datafusion_expr::{col, Expr}; +use datafusion_expr::{Expr, col}; use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo, TimezoneInfo}; use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption}; use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias}; @@ -202,7 +202,9 @@ impl FromStr for NullOrdering { "nulls_min" => Ok(Self::NullsMin), "nulls_first" => Ok(Self::NullsFirst), "nulls_last" => Ok(Self::NullsLast), - _ => plan_err!("Unknown null ordering: Expected one of 'nulls_first', 'nulls_last', 'nulls_min' or 'nulls_max'. Got {s}"), + _ => plan_err!( + "Unknown null ordering: Expected one of 'nulls_first', 'nulls_last', 'nulls_min' or 'nulls_max'. Got {s}" + ), } } } @@ -593,10 +595,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { sql_type: &SQLDataType, ) -> Result { // First check if any of the registered type_planner can handle this type - if let Some(type_planner) = self.context_provider.get_type_planner() { - if let Some(data_type) = type_planner.plan_type(sql_type)? { - return Ok(data_type.into_nullable_field_ref()); - } + if let Some(type_planner) = self.context_provider.get_type_planner() + && let Some(data_type) = type_planner.plan_type(sql_type)? + { + return Ok(data_type.into_nullable_field_ref()); } // If no type_planner can handle this type, use the default conversion diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs index 6a9267a9a580..eba48a2401c3 100644 --- a/datafusion/sql/src/query.rs +++ b/datafusion/sql/src/query.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use crate::stack::StackGuard; -use datafusion_common::{not_impl_err, Constraints, DFSchema, Result}; +use datafusion_common::{Constraints, DFSchema, Result, not_impl_err}; use datafusion_expr::expr::{Sort, WildcardOptions}; use datafusion_expr::select_expr::SelectExpr; diff --git a/datafusion/sql/src/relation/join.rs b/datafusion/sql/src/relation/join.rs index 754ded1514a6..8e1a8817309f 100644 --- a/datafusion/sql/src/relation/join.rs +++ b/datafusion/sql/src/relation/join.rs @@ -16,7 +16,7 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_common::{not_impl_err, plan_datafusion_err, Column, Result}; +use datafusion_common::{Column, Result, not_impl_err, plan_datafusion_err}; use datafusion_expr::{JoinType, LogicalPlan, LogicalPlanBuilder}; use sqlparser::ast::{ Join, JoinConstraint, JoinOperator, ObjectName, TableFactor, TableWithJoins, diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index 6a06b2d52e7a..4ad37435115d 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -21,13 +21,13 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{ - not_impl_err, plan_err, DFSchema, Diagnostic, Result, Span, Spans, TableReference, + DFSchema, Diagnostic, Result, Span, Spans, TableReference, not_impl_err, plan_err, }; use datafusion_expr::builder::subquery_alias; use datafusion_expr::planner::{ PlannedRelation, RelationPlannerContext, RelationPlanning, }; -use datafusion_expr::{expr::Unnest, Expr, LogicalPlan, LogicalPlanBuilder}; +use datafusion_expr::{Expr, LogicalPlan, LogicalPlanBuilder, expr::Unnest}; use datafusion_expr::{Subquery, SubqueryAlias}; use sqlparser::ast::{FunctionArg, FunctionArgExpr, Spanned, TableFactor}; @@ -360,7 +360,8 @@ fn optimize_subquery_sort(plan: LogicalPlan) -> Result> // 2. RANK / ROW_NUMBER ... => Handled by a `WindowAggr` and its requirements. // 3. LIMIT => Handled by a `Sort`, so we need to search for it. let mut has_limit = false; - let new_plan = plan.transform_down(|c| { + + plan.transform_down(|c| { if let LogicalPlan::Limit(_) = c { has_limit = true; return Ok(Transformed::no(c)); @@ -375,6 +376,5 @@ fn optimize_subquery_sort(plan: LogicalPlan) -> Result> } _ => Ok(Transformed::no(c)), } - }); - new_plan + }) } diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index fa9337a30adb..c5a635cac956 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -22,15 +22,15 @@ use std::sync::Arc; use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use crate::query::to_order_by_exprs_with_select; use crate::utils::{ + CheckColumnsMustReferenceAggregatePurpose, CheckColumnsSatisfyExprsPurpose, check_columns_satisfy_exprs, extract_aliases, rebase_expr, resolve_aliases_to_exprs, resolve_columns, resolve_positions_to_exprs, rewrite_recursive_unnests_bottom_up, - CheckColumnsMustReferenceAggregatePurpose, CheckColumnsSatisfyExprsPurpose, }; use datafusion_common::error::DataFusionErrorBuilder; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; -use datafusion_common::{not_impl_err, plan_err, Result}; use datafusion_common::{RecursionUnnestOption, UnnestOptions}; +use datafusion_common::{Result, not_impl_err, plan_err}; use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions}; use datafusion_expr::expr_rewriter::{ normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_sorts, @@ -46,8 +46,9 @@ use datafusion_expr::{ use indexmap::IndexMap; use sqlparser::ast::{ - visit_expressions_mut, Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr, - OrderBy, SelectItemQualifiedWildcardKind, WildcardAdditionalOptions, WindowType, + Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr, OrderBy, + SelectItemQualifiedWildcardKind, WildcardAdditionalOptions, WindowType, + visit_expressions_mut, }; use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins}; @@ -273,14 +274,18 @@ impl SqlToRel<'_, S> { )? } else { match having_expr_opt { - Some(having_expr) => return plan_err!("HAVING clause references: {having_expr} must appear in the GROUP BY clause or be used in an aggregate function"), + Some(having_expr) => { + return plan_err!( + "HAVING clause references: {having_expr} must appear in the GROUP BY clause or be used in an aggregate function" + ); + } None => AggregatePlanResult { plan: base_plan.clone(), select_exprs: select_exprs.clone(), having_expr: having_expr_opt, qualify_expr: qualify_expr_opt, order_by_exprs: order_by_rex, - } + }, } }; @@ -364,7 +369,9 @@ impl SqlToRel<'_, S> { || !group_by_exprs.is_empty() || !window_func_exprs.is_empty() { - return not_impl_err!("DISTINCT ON expressions with GROUP BY, aggregation or window functions are not supported "); + return not_impl_err!( + "DISTINCT ON expressions with GROUP BY, aggregation or window functions are not supported " + ); } let on_expr = on_expr @@ -780,7 +787,7 @@ impl SqlToRel<'_, S> { SelectItemQualifiedWildcardKind::Expr(_) => { return plan_err!( "Qualified wildcard with expression not supported" - ) + ); } }; let qualifier = self.object_name_to_table_reference(object_name)?; @@ -1043,11 +1050,11 @@ impl SqlToRel<'_, S> { .iter() .find_map(|select_expr| { // Only consider aliased expressions - if let Expr::Alias(alias) = select_expr { - if alias.expr.as_ref() == &rewritten_expr { - // Use the alias name - return Some(Expr::Column(alias.name.clone().into())); - } + if let Expr::Alias(alias) = select_expr + && alias.expr.as_ref() == &rewritten_expr + { + // Use the alias name + return Some(Expr::Column(alias.name.clone().into())); } None }) @@ -1105,33 +1112,32 @@ impl SqlToRel<'_, S> { { let mut err = None; let _ = visit_expressions_mut(expr, |expr| { - if let SQLExpr::Function(f) = expr { - if let Some(WindowType::NamedWindow(ident)) = &f.over { - let normalized_ident = - self.ident_normalizer.normalize(ident.clone()); - for ( - NamedWindowDefinition(_, window_expr), - normalized_window_ident, - ) in named_windows.iter() - { - if normalized_ident.eq(normalized_window_ident) { - f.over = Some(match window_expr { - NamedWindowExpr::NamedWindow(ident) => { - WindowType::NamedWindow(ident.clone()) - } - NamedWindowExpr::WindowSpec(spec) => { - WindowType::WindowSpec(spec.clone()) - } - }) - } - } - // All named windows must be defined with a WindowSpec. - if let Some(WindowType::NamedWindow(ident)) = &f.over { - err = - Some(plan_err!("The window {ident} is not defined!")); - return ControlFlow::Break(()); + if let SQLExpr::Function(f) = expr + && let Some(WindowType::NamedWindow(ident)) = &f.over + { + let normalized_ident = + self.ident_normalizer.normalize(ident.clone()); + for ( + NamedWindowDefinition(_, window_expr), + normalized_window_ident, + ) in named_windows.iter() + { + if normalized_ident.eq(normalized_window_ident) { + f.over = Some(match window_expr { + NamedWindowExpr::NamedWindow(ident) => { + WindowType::NamedWindow(ident.clone()) + } + NamedWindowExpr::WindowSpec(spec) => { + WindowType::WindowSpec(spec.clone()) + } + }) } } + // All named windows must be defined with a WindowSpec. + if let Some(WindowType::NamedWindow(ident)) = &f.over { + err = Some(plan_err!("The window {ident} is not defined!")); + return ControlFlow::Break(()); + } } ControlFlow::Continue(()) }); diff --git a/datafusion/sql/src/set_expr.rs b/datafusion/sql/src/set_expr.rs index 5b65e1c045bd..d4e771cb4858 100644 --- a/datafusion/sql/src/set_expr.rs +++ b/datafusion/sql/src/set_expr.rs @@ -17,7 +17,7 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_common::{ - not_impl_err, plan_err, DataFusionError, Diagnostic, Result, Span, + DataFusionError, Diagnostic, Result, Span, not_impl_err, plan_err, }; use datafusion_expr::{LogicalPlan, LogicalPlanBuilder}; use sqlparser::ast::{SetExpr, SetOperator, SetQuantifier, Spanned}; diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index da1e7819f7ef..1acbcc92dfe1 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -25,7 +25,7 @@ use crate::parser::{ LexOrdering, ResetStatement, Statement as DFStatement, }; use crate::planner::{ - object_name_to_qualifier, ContextProvider, PlannerContext, SqlToRel, + ContextProvider, PlannerContext, SqlToRel, object_name_to_qualifier, }; use crate::utils::normalize_ident; @@ -33,18 +33,18 @@ use arrow::datatypes::{Field, FieldRef, Fields}; use datafusion_common::error::_plan_err; use datafusion_common::parsers::CompressionTypeVariant; use datafusion_common::{ - exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, schema_err, - unqualified_field_not_found, Column, Constraint, Constraints, DFSchema, DFSchemaRef, - DataFusionError, Result, ScalarValue, SchemaError, SchemaReference, TableReference, - ToDFSchema, + Column, Constraint, Constraints, DFSchema, DFSchemaRef, DataFusionError, Result, + ScalarValue, SchemaError, SchemaReference, TableReference, ToDFSchema, exec_err, + internal_err, not_impl_err, plan_datafusion_err, plan_err, schema_err, + unqualified_field_not_found, }; use datafusion_expr::dml::{CopyTo, InsertOp}; use datafusion_expr::expr_rewriter::normalize_col_with_schemas_and_ambiguity_check; -use datafusion_expr::logical_plan::builder::project; use datafusion_expr::logical_plan::DdlStatement; +use datafusion_expr::logical_plan::builder::project; use datafusion_expr::utils::expr_to_columns; use datafusion_expr::{ - cast, col, Analyze, CreateCatalog, CreateCatalogSchema, + Analyze, CreateCatalog, CreateCatalogSchema, CreateExternalTable as PlanCreateExternalTable, CreateFunction, CreateFunctionBody, CreateIndex as PlanCreateIndex, CreateMemoryTable, CreateView, Deallocate, DescribeTable, DmlStatement, DropCatalogSchema, DropFunction, DropTable, DropView, @@ -52,7 +52,7 @@ use datafusion_expr::{ LogicalPlan, LogicalPlanBuilder, OperateFunctionArg, PlanType, Prepare, ResetVariable, SetVariable, SortExpr, Statement as PlanStatement, ToStringifiedPlan, TransactionAccessMode, TransactionConclusion, TransactionEnd, - TransactionIsolationLevel, TransactionStart, Volatility, WriteOp, + TransactionIsolationLevel, TransactionStart, Volatility, WriteOp, cast, col, }; use sqlparser::ast::{ self, BeginTransactionKind, IndexColumn, IndexType, NullsDistinctOption, OrderByExpr, @@ -717,18 +717,31 @@ impl SqlToRel<'_, S> { } ObjectType::Schema => { let name = match name { - TableReference::Bare { table } => Ok(SchemaReference::Bare { schema: table }), - TableReference::Partial { schema, table } => Ok(SchemaReference::Full { schema: table, catalog: schema }), - TableReference::Full { catalog: _, schema: _, table: _ } => { - Err(ParserError("Invalid schema specifier (has 3 parts)".to_string())) + TableReference::Bare { table } => { + Ok(SchemaReference::Bare { schema: table }) } + TableReference::Partial { schema, table } => { + Ok(SchemaReference::Full { + schema: table, + catalog: schema, + }) + } + TableReference::Full { + catalog: _, + schema: _, + table: _, + } => Err(ParserError( + "Invalid schema specifier (has 3 parts)".to_string(), + )), }?; - Ok(LogicalPlan::Ddl(DdlStatement::DropCatalogSchema(DropCatalogSchema { - name, - if_exists, - cascade, - schema: DFSchemaRef::new(DFSchema::empty()), - }))) + Ok(LogicalPlan::Ddl(DdlStatement::DropCatalogSchema( + DropCatalogSchema { + name, + if_exists, + cascade, + schema: DFSchemaRef::new(DFSchema::empty()), + }, + ))) } _ => not_impl_err!( "Only `DROP TABLE/VIEW/SCHEMA ...` statement is supported currently" @@ -956,7 +969,9 @@ impl SqlToRel<'_, S> { let table_name = match table { TableObject::TableName(table_name) => table_name, TableObject::TableFunction(_) => { - return not_impl_err!("INSERT INTO Table functions not supported") + return not_impl_err!( + "INSERT INTO Table functions not supported" + ); } }; if let Some(or) = or { @@ -1238,12 +1253,11 @@ impl SqlToRel<'_, S> { }; if let (Some(pos_default), Some(pos_non_default)) = (first_default, last_non_default) + && pos_non_default > pos_default { - if pos_non_default > pos_default { - return plan_err!( - "Non-default arguments cannot follow default arguments." - ); - } + return plan_err!( + "Non-default arguments cannot follow default arguments." + ); } // At the moment functions can't be qualified `schema.name` let name = match &name.0[..] { @@ -1270,7 +1284,9 @@ impl SqlToRel<'_, S> { let count_positional = fields.iter().filter(|f| f.name() == "").count(); if !(count_positional == 0 || count_positional == fields.len()) { - return plan_err!("All function arguments must use either named or positional style."); + return plan_err!( + "All function arguments must use either named or positional style." + ); } } let mut planner_context = PlannerContext::new() @@ -2235,7 +2251,9 @@ impl SqlToRel<'_, S> { (false, false) => InsertOp::Append, (true, false) => InsertOp::Overwrite, (false, true) => InsertOp::Replace, - (true, true) => plan_err!("Conflicting insert operations: `overwrite` and `replace_into` cannot both be true")?, + (true, true) => plan_err!( + "Conflicting insert operations: `overwrite` and `replace_into` cannot both be true" + )?, }; let plan = LogicalPlan::Dml(DmlStatement::new( diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index 2cf26009ac0f..87250f1646ea 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -20,7 +20,7 @@ use std::ops::ControlFlow; use sqlparser::ast::helpers::attached_token::AttachedToken; use sqlparser::ast::{ - self, visit_expressions_mut, LimitClause, OrderByKind, SelectFlavor, + self, LimitClause, OrderByKind, SelectFlavor, visit_expressions_mut, }; #[derive(Clone)] @@ -302,7 +302,7 @@ impl SelectBuilder { group_by: match self.group_by { Some(ref value) => value.clone(), None => { - return Err(Into::into(UninitializedFieldError::from("group_by"))) + return Err(Into::into(UninitializedFieldError::from("group_by"))); } }, cluster_by: self.cluster_by.clone(), @@ -581,7 +581,7 @@ impl DerivedRelationBuilder { subquery: match self.subquery { Some(ref value) => value.clone(), None => { - return Err(Into::into(UninitializedFieldError::from("subquery"))) + return Err(Into::into(UninitializedFieldError::from("subquery"))); } }, alias: self.alias.clone(), @@ -711,10 +711,10 @@ impl From for BuilderError { impl fmt::Display for BuilderError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Self::UninitializedField(ref field) => { + Self::UninitializedField(field) => { write!(f, "`{field}` must be initialized") } - Self::ValidationError(ref error) => write!(f, "{error}"), + Self::ValidationError(error) => write!(f, "{error}"), } } } diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index 834b0a97a47b..82202a405f69 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -18,8 +18,8 @@ use std::{collections::HashMap, sync::Arc}; use super::{ - utils::character_length_to_sql, utils::date_part_to_sql, - utils::sqlite_date_trunc_to_sql, utils::sqlite_from_unixtime_to_sql, Unparser, + Unparser, utils::character_length_to_sql, utils::date_part_to_sql, + utils::sqlite_date_trunc_to_sql, utils::sqlite_from_unixtime_to_sql, }; use arrow::array::timezone::Tz; use arrow::datatypes::TimeUnit; diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 62e1927ccfa1..3391408e04c1 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -25,27 +25,27 @@ use sqlparser::ast::{ use std::sync::Arc; use std::vec; -use super::dialect::IntervalStyle; use super::Unparser; +use super::dialect::IntervalStyle; use arrow::array::{ + ArrayRef, Date32Array, Date64Array, PrimitiveArray, types::{ ArrowTemporalType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, }, - ArrayRef, Date32Array, Date64Array, PrimitiveArray, }; use arrow::datatypes::{ - DataType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, DecimalType, + DataType, Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, DecimalType, }; use arrow::util::display::array_value_to_string; use datafusion_common::{ - assert_eq_or_internal_err, assert_or_internal_err, internal_datafusion_err, - internal_err, not_impl_err, plan_err, Column, Result, ScalarValue, + Column, Result, ScalarValue, assert_eq_or_internal_err, assert_or_internal_err, + internal_datafusion_err, internal_err, not_impl_err, plan_err, }; use datafusion_expr::{ - expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction}, Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast, + expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction}, }; use sqlparser::ast::helpers::attached_token::AttachedToken; use sqlparser::tokenizer::Span; @@ -639,9 +639,9 @@ impl Unparser<'_> { Expr::Literal(lit, _) => self.new_ident_quoted_if_needs(lit.to_string()), _ => { return internal_err!( - "get_field expects second argument to be a string, but received: {:?}", - &args[1] - ) + "get_field expects second argument to be a string, but received: {:?}", + &args[1] + ); } }; @@ -650,7 +650,12 @@ impl Unparser<'_> { let mut id = match self.col_to_sql(col)? { ast::Expr::Identifier(ident) => vec![ident], ast::Expr::CompoundIdentifier(idents) => idents, - other => return internal_err!("expected col_to_sql to return an Identifier or CompoundIdentifier, but received: {:?}", other), + other => { + return internal_err!( + "expected col_to_sql to return an Identifier or CompoundIdentifier, but received: {:?}", + other + ); + } }; id.push(field); Ok(ast::Expr::CompoundIdentifier(id)) @@ -1070,7 +1075,7 @@ impl Unparser<'_> { return Err(internal_datafusion_err!( "Expected Timestamp, got {:?}", T::DATA_TYPE - )) + )); } }; @@ -1430,7 +1435,9 @@ impl Unparser<'_> { }; return Ok(ast::Expr::Interval(interval)); } else if months != 0 { - return not_impl_err!("Unsupported Interval scalar with both Month and DayTime for IntervalStyle::MySQL"); + return not_impl_err!( + "Unsupported Interval scalar with both Month and DayTime for IntervalStyle::MySQL" + ); } // DAY only @@ -1618,7 +1625,9 @@ impl Unparser<'_> { }; Ok(ast::Expr::Interval(interval)) } else { - not_impl_err!("Unsupported IntervalMonthDayNano scalar with both Month and DayTime for IntervalStyle::SQLStandard") + not_impl_err!( + "Unsupported IntervalMonthDayNano scalar with both Month and DayTime for IntervalStyle::SQLStandard" + ) } } _ => not_impl_err!( @@ -1787,12 +1796,12 @@ mod tests { use datafusion_common::{Spans, TableReference}; use datafusion_expr::expr::WildcardOptions; use datafusion_expr::{ - case, cast, col, cube, exists, grouping_set, interval_datetime_lit, - interval_year_month_lit, lit, not, not_exists, out_ref_col, placeholder, rollup, - table_scan, try_cast, when, ColumnarValue, ScalarFunctionArgs, ScalarUDF, - ScalarUDFImpl, Signature, Volatility, WindowFrame, WindowFunctionDefinition, + ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, + Volatility, WindowFrame, WindowFunctionDefinition, case, cast, col, cube, exists, + grouping_set, interval_datetime_lit, interval_year_month_lit, lit, not, + not_exists, out_ref_col, placeholder, rollup, table_scan, try_cast, when, }; - use datafusion_expr::{interval_month_day_nano_lit, ExprFunctionExt}; + use datafusion_expr::{ExprFunctionExt, interval_month_day_nano_lit}; use datafusion_functions::datetime::from_unixtime::FromUnixtimeFunc; use datafusion_functions::expr_fn::{get_field, named_struct}; use datafusion_functions_aggregate::count::count_udaf; diff --git a/datafusion/sql/src/unparser/extension_unparser.rs b/datafusion/sql/src/unparser/extension_unparser.rs index b778130ca5a2..f38cd1db639e 100644 --- a/datafusion/sql/src/unparser/extension_unparser.rs +++ b/datafusion/sql/src/unparser/extension_unparser.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use crate::unparser::ast::{QueryBuilder, RelationBuilder, SelectBuilder}; use crate::unparser::Unparser; +use crate::unparser::ast::{QueryBuilder, RelationBuilder, SelectBuilder}; use datafusion_expr::UserDefinedLogicalNode; use sqlparser::ast::Statement; diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 074ae994e6f5..61a06d9c3837 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -16,21 +16,21 @@ // under the License. use super::{ + Unparser, ast::{ BuilderError, DerivedRelationBuilder, QueryBuilder, RelationBuilder, SelectBuilder, TableRelationBuilder, TableWithJoinsBuilder, }, rewrite::{ - inject_column_aliases_into_subquery, normalize_union_schema, + TableAliasRewriter, inject_column_aliases_into_subquery, normalize_union_schema, rewrite_plan_for_sort_on_non_projected_fields, - subquery_alias_inner_query_and_columns, TableAliasRewriter, + subquery_alias_inner_query_and_columns, }, utils::{ find_agg_node_within_select, find_unnest_node_within_select, find_window_nodes_within_select, try_transform_to_simple_table_scan_with_filters, unproject_sort_expr, unproject_unnest_expr, unproject_window_exprs, }, - Unparser, }; use crate::unparser::extension_unparser::{ UnparseToStatementResult, UnparseWithinStatementResult, @@ -39,15 +39,15 @@ use crate::unparser::utils::{find_unnest_node_until_relation, unproject_agg_expr use crate::unparser::{ast::UnnestRelationBuilder, rewrite::rewrite_qualify}; use crate::utils::UNNEST_PLACEHOLDER; use datafusion_common::{ - assert_or_internal_err, internal_err, not_impl_err, + Column, DataFusionError, Result, ScalarValue, TableReference, assert_or_internal_err, + internal_err, not_impl_err, tree_node::{TransformedResult, TreeNode}, - Column, DataFusionError, Result, ScalarValue, TableReference, }; use datafusion_expr::expr::OUTER_REFERENCE_COLUMN_PREFIX; use datafusion_expr::{ - expr::Alias, BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan, + BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan, LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, Unnest, - UserDefinedLogicalNode, + UserDefinedLogicalNode, expr::Alias, }; use sqlparser::ast::{self, Ident, OrderByKind, SetExpr, TableAliasColumnDef}; use std::{sync::Arc, vec}; @@ -384,20 +384,19 @@ impl Unparser<'_> { } else { None }; - if self.dialect.unnest_as_table_factor() && unnest_input_type.is_some() { - if let LogicalPlan::Unnest(unnest) = &p.input.as_ref() { - if let Some(unnest_relation) = - self.try_unnest_to_table_factor_sql(unnest)? - { - relation.unnest(unnest_relation); - return self.select_to_sql_recursively( - p.input.as_ref(), - query, - select, - relation, - ); - } - } + if self.dialect.unnest_as_table_factor() + && unnest_input_type.is_some() + && let LogicalPlan::Unnest(unnest) = &p.input.as_ref() + && let Some(unnest_relation) = + self.try_unnest_to_table_factor_sql(unnest)? + { + relation.unnest(unnest_relation); + return self.select_to_sql_recursively( + p.input.as_ref(), + query, + select, + relation, + ); } // If it's a unnest projection, we should provide the table column alias @@ -585,18 +584,17 @@ impl Unparser<'_> { // If this distinct is the parent of a Union and we're in a query context, // then we need to unparse as a `UNION` rather than a `UNION ALL`. - if let Distinct::All(input) = distinct { - if matches!(input.as_ref(), LogicalPlan::Union(_)) { - if let Some(query_mut) = query.as_mut() { - query_mut.distinct_union(); - return self.select_to_sql_recursively( - input.as_ref(), - query, - select, - relation, - ); - } - } + if let Distinct::All(input) = distinct + && matches!(input.as_ref(), LogicalPlan::Union(_)) + && let Some(query_mut) = query.as_mut() + { + query_mut.distinct_union(); + return self.select_to_sql_recursively( + input.as_ref(), + query, + select, + relation, + ); } let (select_distinct, input) = match distinct { @@ -847,7 +845,7 @@ impl Unparser<'_> { Err(e) => { return internal_err!( "Failed to transform SubqueryAlias plan: {e}" - ) + ); } }; @@ -1015,15 +1013,14 @@ impl Unparser<'_> { /// /// `outer_ref` is the display result of [Expr::OuterReferenceColumn] fn check_unnest_placeholder_with_outer_ref(expr: &Expr) -> Option { - if let Expr::Alias(Alias { expr, .. }) = expr { - if let Expr::Column(Column { name, .. }) = expr.as_ref() { - if let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) { - if prefix.starts_with(&format!("({OUTER_REFERENCE_COLUMN_PREFIX}(")) { - return Some(UnnestInputType::OuterReference); - } - return Some(UnnestInputType::Scalar); - } + if let Expr::Alias(Alias { expr, .. }) = expr + && let Expr::Column(Column { name, .. }) = expr.as_ref() + && let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) + { + if prefix.starts_with(&format!("({OUTER_REFERENCE_COLUMN_PREFIX}(")) { + return Some(UnnestInputType::OuterReference); } + return Some(UnnestInputType::Scalar); } None } @@ -1091,42 +1088,40 @@ impl Unparser<'_> { // // Example: // select t1.c1 from t1 where t1.c1 > 1 -> select a.c1 from t1 as a where a.c1 > 1 - if let Some(ref alias) = alias { - if table_scan.projection.is_some() || !table_scan.filters.is_empty() { - builder = builder.alias(alias.clone())?; - } + if let Some(ref alias) = alias + && (table_scan.projection.is_some() || !table_scan.filters.is_empty()) + { + builder = builder.alias(alias.clone())?; } // Avoid creating a duplicate Projection node, which would result in an additional subquery if a projection already exists. // For example, if the `optimize_projection` rule is applied, there will be a Projection node, and duplicate projection // information included in the TableScan node. - if !already_projected { - if let Some(project_vec) = &table_scan.projection { - if project_vec.is_empty() { - builder = builder.project(vec![Expr::Literal( - ScalarValue::Int64(Some(1)), - None, - )])?; - } else { - let project_columns = project_vec - .iter() - .cloned() - .map(|i| { - let schema = table_scan.source.schema(); - let field = schema.field(i); - if alias.is_some() { - Column::new(alias.clone(), field.name().clone()) - } else { - Column::new( - Some(table_scan.table_name.clone()), - field.name().clone(), - ) - } - }) - .collect::>(); - builder = builder.project(project_columns)?; - }; - } + if !already_projected && let Some(project_vec) = &table_scan.projection { + if project_vec.is_empty() { + builder = builder.project(vec![Expr::Literal( + ScalarValue::Int64(Some(1)), + None, + )])?; + } else { + let project_columns = project_vec + .iter() + .cloned() + .map(|i| { + let schema = table_scan.source.schema(); + let field = schema.field(i); + if alias.is_some() { + Column::new(alias.clone(), field.name().clone()) + } else { + Column::new( + Some(table_scan.table_name.clone()), + field.name().clone(), + ) + } + }) + .collect::>(); + builder = builder.project(project_columns)?; + }; } let filter_expr: Result> = table_scan @@ -1159,10 +1154,11 @@ impl Unparser<'_> { // So we will append the alias to this subquery. // Example: // select * from t1 limit 10 -> (select * from t1 limit 10) as a - if let Some(alias) = alias { - if table_scan.projection.is_none() && table_scan.filters.is_empty() { - builder = builder.alias(alias)?; - } + if let Some(alias) = alias + && table_scan.projection.is_none() + && table_scan.filters.is_empty() + { + builder = builder.alias(alias)?; } Ok(Some(builder.build()?)) @@ -1173,11 +1169,11 @@ impl Unparser<'_> { Some(subquery_alias.alias.clone()), already_projected, )?; - if let Some(alias) = alias { - if let Some(plan) = ret { - let plan = LogicalPlanBuilder::new(plan).alias(alias)?.build()?; - return Ok(Some(plan)); - } + if let Some(alias) = alias + && let Some(plan) = ret + { + let plan = LogicalPlanBuilder::new(plan).alias(alias)?.build()?; + return Ok(Some(plan)); } Ok(ret) } diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs index 1b6c3433f79f..ec1b17cd28a9 100644 --- a/datafusion/sql/src/unparser/rewrite.rs +++ b/datafusion/sql/src/unparser/rewrite.rs @@ -20,8 +20,8 @@ use std::{collections::HashSet, sync::Arc}; use arrow::datatypes::Schema; use datafusion_common::tree_node::TreeNodeContainer; use datafusion_common::{ - tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter}, Column, HashMap, Result, TableReference, + tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter}, }; use datafusion_expr::expr::{Alias, UNNEST_COLUMN_PREFIX}; use datafusion_expr::{Expr, LogicalPlan, Projection, Sort, SortExpr}; @@ -311,7 +311,7 @@ pub(super) fn subquery_alias_inner_query_and_columns( // Projection: j1.j1_id AS id // Projection: j1.j1_id for (i, inner_expr) in inner_projection.expr.iter().enumerate() { - let Expr::Alias(ref outer_alias) = &outer_projections.expr[i] else { + let Expr::Alias(outer_alias) = &outer_projections.expr[i] else { return (plan, vec![]); }; @@ -360,15 +360,14 @@ pub(super) fn find_unnest_column_alias( if projection.expr.len() != 1 { return (plan, None); } - if let Some(Expr::Alias(alias)) = projection.expr.first() { - if alias + if let Some(Expr::Alias(alias)) = projection.expr.first() + && alias .expr .schema_name() .to_string() .starts_with(&format!("{UNNEST_COLUMN_PREFIX}(")) - { - return (projection.input.as_ref(), Some(alias.name.clone())); - } + { + return (projection.input.as_ref(), Some(alias.name.clone())); } } (plan, None) diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs index 40d0d441364a..f539c0ddc1e8 100644 --- a/datafusion/sql/src/unparser/utils.rs +++ b/datafusion/sql/src/unparser/utils.rs @@ -18,17 +18,17 @@ use std::{cmp::Ordering, sync::Arc, vec}; use super::{ - dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle, - rewrite::TableAliasRewriter, Unparser, + Unparser, dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle, + rewrite::TableAliasRewriter, }; use datafusion_common::{ - assert_eq_or_internal_err, internal_err, + Column, DataFusionError, Result, ScalarValue, assert_eq_or_internal_err, + internal_err, tree_node::{Transformed, TransformedResult, TreeNode}, - Column, DataFusionError, Result, ScalarValue, }; use datafusion_expr::{ - expr, utils::grouping_set_to_exprlist, Aggregate, Expr, LogicalPlan, - LogicalPlanBuilder, Projection, SortExpr, Unnest, Window, + Aggregate, Expr, LogicalPlan, LogicalPlanBuilder, Projection, SortExpr, Unnest, + Window, expr, utils::grouping_set_to_exprlist, }; use indexmap::IndexSet; @@ -166,14 +166,12 @@ pub(crate) fn unproject_unnest_expr(expr: Expr, unnest: &Unnest) -> Result // Check if the column is among the columns to run unnest on. // Currently, only List/Array columns (defined in `list_type_columns`) are supported for unnesting. if unnest.list_type_columns.iter().any(|e| e.1.output_column.name == col_ref.name) { - if let Ok(idx) = unnest.schema.index_of_column(col_ref) { - if let LogicalPlan::Projection(Projection { expr, .. }) = unnest.input.as_ref() { - if let Some(unprojected_expr) = expr.get(idx) { + if let Ok(idx) = unnest.schema.index_of_column(col_ref) + && let LogicalPlan::Projection(Projection { expr, .. }) = unnest.input.as_ref() + && let Some(unprojected_expr) = expr.get(idx) { let unnest_expr = Expr::Unnest(expr::Unnest::new(unprojected_expr.clone())); return Ok(Transformed::yes(unnest_expr)); } - } - } return internal_err!( "Tried to unproject unnest expr for column '{}' that was not found in the provided Unnest!", &col_ref.name ); @@ -291,14 +289,14 @@ pub(crate) fn unproject_sort_expr( } // In case of aggregation there could be columns containing aggregation functions we need to unproject - if let Some(agg) = agg { - if agg.schema.is_column_from_schema(&col) { - return Ok(Transformed::yes(unproject_agg_exprs( - Expr::Column(col), - agg, - None, - )?)); - } + if let Some(agg) = agg + && agg.schema.is_column_from_schema(&col) + { + return Ok(Transformed::yes(unproject_agg_exprs( + Expr::Column(col), + agg, + None, + )?)); } // If SELECT and ORDER BY contain the same expression with a scalar function, the ORDER BY expression will @@ -306,14 +304,12 @@ pub(crate) fn unproject_sort_expr( // to transform it back to the actual expression. if let LogicalPlan::Projection(Projection { expr, schema, .. }) = input + && let Ok(idx) = schema.index_of_column(&col) + && let Some(Expr::ScalarFunction(scalar_fn)) = expr.get(idx) { - if let Ok(idx) = schema.index_of_column(&col) { - if let Some(Expr::ScalarFunction(scalar_fn)) = expr.get(idx) { - return Ok(Transformed::yes(Expr::ScalarFunction( - scalar_fn.clone(), - ))); - } - } + return Ok(Transformed::yes(Expr::ScalarFunction( + scalar_fn.clone(), + ))); } Ok(Transformed::no(Expr::Column(col))) diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 17618c483193..af2e1c79427c 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -20,14 +20,14 @@ use std::vec; use arrow::datatypes::{ - DataType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE, + DECIMAL_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DataType, }; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter, }; use datafusion_common::{ - assert_or_internal_err, exec_datafusion_err, exec_err, internal_err, plan_err, Column, DFSchemaRef, Diagnostic, HashMap, Result, ScalarValue, + assert_or_internal_err, exec_datafusion_err, exec_err, internal_err, plan_err, }; use datafusion_expr::builder::get_struct_unnested_columns; use datafusion_expr::expr::{ @@ -35,7 +35,7 @@ use datafusion_expr::expr::{ }; use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs}; use datafusion_expr::{ - col, expr_vec_fmt, ColumnUnnestList, Expr, ExprSchemable, LogicalPlan, + ColumnUnnestList, Expr, ExprSchemable, LogicalPlan, col, expr_vec_fmt, }; use indexmap::IndexMap; @@ -124,7 +124,9 @@ impl CheckColumnsSatisfyExprsPurpose { } fn diagnostic_message(&self, expr: &Expr) -> String { - format!("'{expr}' must appear in GROUP BY clause because it's not an aggregate expression") + format!( + "'{expr}' must appear in GROUP BY clause because it's not an aggregate expression" + ) } } @@ -223,7 +225,8 @@ pub(crate) fn resolve_positions_to_exprs( } Expr::Literal(ScalarValue::Int64(Some(position)), _) => plan_err!( "Cannot find column with position {} in SELECT clause. Valid columns: 1 to {}", - position, select_exprs.len() + position, + select_exprs.len() ), _ => Ok(expr), } @@ -292,7 +295,7 @@ pub(crate) fn make_decimal_type( (Some(p), Some(s)) => (p as u8, s as i8), (Some(p), None) => (p as u8, 0), (None, Some(_)) => { - return plan_err!("Cannot specify only scale for decimal data type") + return plan_err!("Cannot specify only scale for decimal data type"); } (None, None) => (DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE), }; @@ -675,7 +678,7 @@ mod tests { use arrow::datatypes::{DataType as ArrowDataType, Field, Fields, Schema}; use datafusion_common::{Column, DFSchema, Result}; use datafusion_expr::{ - col, lit, unnest, ColumnUnnestList, EmptyRelation, LogicalPlan, + ColumnUnnestList, EmptyRelation, LogicalPlan, col, lit, unnest, }; use datafusion_functions::core::expr_ext::FieldAccessor; use datafusion_functions_aggregate::expr_fn::count; @@ -747,13 +750,15 @@ mod tests { // Only the bottom most unnest exprs are transformed assert_eq!( transformed_exprs, - vec![col("__unnest_placeholder(3d_col,depth=2)") - .alias("UNNEST(UNNEST(3d_col))") - .add( - col("__unnest_placeholder(3d_col,depth=2)") - .alias("UNNEST(UNNEST(3d_col))") - ) - .add(col("i64_col"))] + vec![ + col("__unnest_placeholder(3d_col,depth=2)") + .alias("UNNEST(UNNEST(3d_col))") + .add( + col("__unnest_placeholder(3d_col,depth=2)") + .alias("UNNEST(UNNEST(3d_col))") + ) + .add(col("i64_col")) + ] ); column_unnests_eq( vec![ @@ -789,7 +794,9 @@ mod tests { ] ); column_unnests_eq( - vec!["__unnest_placeholder(3d_col)=>[__unnest_placeholder(3d_col,depth=2)|depth=2, __unnest_placeholder(3d_col,depth=1)|depth=1]"], + vec![ + "__unnest_placeholder(3d_col)=>[__unnest_placeholder(3d_col,depth=2)|depth=2, __unnest_placeholder(3d_col,depth=1)|depth=1]", + ], &unnest_placeholder_columns, ); // Still reference struct_col in original schema but with alias, @@ -881,9 +888,11 @@ mod tests { // Only transform the unnest children assert_eq!( transformed_exprs, - vec![col("__unnest_placeholder(array_col,depth=1)") - .alias("UNNEST(array_col)") - .add(lit(1i64))] + vec![ + col("__unnest_placeholder(array_col,depth=1)") + .alias("UNNEST(array_col)") + .add(lit(1i64)) + ] ); // Keep appending to the current vector diff --git a/datafusion/sql/tests/cases/collection.rs b/datafusion/sql/tests/cases/collection.rs index 59704d6445b3..06a876dcfc9e 100644 --- a/datafusion/sql/tests/cases/collection.rs +++ b/datafusion/sql/tests/cases/collection.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use datafusion_common::{assert_contains, DataFusionError}; +use datafusion_common::{DataFusionError, assert_contains}; use datafusion_sql::planner::SqlToRel; use sqlparser::{dialect::GenericDialect, parser::Parser}; @@ -42,9 +42,11 @@ fn test_collect_select_items() { let error = do_query(query); let errors = error.iter().collect::>(); assert_eq!(errors.len(), 2); - assert!(errors[0] - .to_string() - .contains("No field named first_namex.")); + assert!( + errors[0] + .to_string() + .contains("No field named first_namex.") + ); assert_contains!(errors[1].to_string(), "No field named last_namex."); } diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index 7ae839851d04..7a729739469d 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -204,8 +204,7 @@ fn test_ambiguous_reference() -> Result<()> { #[test] fn test_incompatible_types_binary_arithmetic() -> Result<()> { - let query = - "SELECT /*whole+left*/id/*left*/ + /*right*/first_name/*right+whole*/ FROM person"; + let query = "SELECT /*whole+left*/id/*left*/ + /*right*/first_name/*right+whole*/ FROM person"; let spans = get_spans(query); let diag = do_query(query); assert_snapshot!(diag.message, @"expressions have incompatible types"); diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index a697fa460bb6..0d5f09142e6e 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -18,9 +18,8 @@ use crate::logical_plan; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::{ - assert_contains, - metadata::{format_type_and_metadata, ScalarAndMetadata}, - ParamValues, ScalarValue, + ParamValues, ScalarValue, assert_contains, + metadata::{ScalarAndMetadata, format_type_and_metadata}, }; use datafusion_expr::{LogicalPlan, Prepare, Statement}; use insta::assert_snapshot; @@ -129,10 +128,12 @@ fn test_prepare_statement_to_plan_panic_prepare_wrong_syntax() { // param is not number following the $ sign // panic due to error returned from the parser let sql = "PREPARE AS SELECT id, age FROM person WHERE age = $foo"; - assert!(logical_plan(sql) - .unwrap_err() - .strip_backtrace() - .contains("Expected: AS, found: SELECT")) + assert!( + logical_plan(sql) + .unwrap_err() + .strip_backtrace() + .contains("Expected: AS, found: SELECT") + ) } #[test] @@ -375,8 +376,7 @@ fn test_prepare_statement_to_plan_params_as_constants() { #[test] fn test_infer_types_from_join() { let test = ParameterTest { - sql: - "SELECT id, order_id FROM person JOIN orders ON id = customer_id and age = $1", + sql: "SELECT id, order_id FROM person JOIN orders ON id = customer_id and age = $1", expected_types: vec![("$1", Some(DataType::Int32))], param_values: vec![ScalarValue::Int32(Some(10))], }; @@ -403,7 +403,7 @@ fn test_prepare_statement_infer_types_from_join() { let test = ParameterTest { sql: "PREPARE my_plan AS SELECT id, order_id FROM person JOIN orders ON id = customer_id and age = $1", expected_types: vec![("$1", Some(DataType::Int32))], - param_values: vec![ScalarValue::Int32(Some(10))] + param_values: vec![ScalarValue::Int32(Some(10))], }; assert_snapshot!( @@ -527,7 +527,7 @@ fn test_infer_types_subquery() { let test = ParameterTest { sql: "SELECT id, age FROM person WHERE age = (select max(age) from person where id = $1)", expected_types: vec![("$1", Some(DataType::UInt32))], - param_values: vec![ScalarValue::UInt32(Some(10))] + param_values: vec![ScalarValue::UInt32(Some(10))], }; assert_snapshot!( @@ -560,7 +560,7 @@ fn test_prepare_statement_infer_types_subquery() { let test = ParameterTest { sql: "PREPARE my_plan AS SELECT id, age FROM person WHERE age = (select max(age) from person where id = $1)", expected_types: vec![("$1", Some(DataType::UInt32))], - param_values: vec![ScalarValue::UInt32(Some(10))] + param_values: vec![ScalarValue::UInt32(Some(10))], }; assert_snapshot!( @@ -690,7 +690,7 @@ fn test_prepare_statement_insert_infer() { ScalarValue::UInt32(Some(1)), ScalarValue::from("Alan"), ScalarValue::from("Turing"), - ] + ], }; assert_snapshot!( test.run(), @@ -788,7 +788,7 @@ fn test_update_infer_with_metadata() { let test = ParameterTestWithMetadata { sql: "PREPARE my_plan AS update person_with_uuid_extension set last_name=$1 where id=$2", expected_types, - param_values + param_values, }; assert_snapshot!( @@ -839,7 +839,7 @@ fn test_insert_infer_with_metadata() { let test = ParameterTestWithMetadata { sql: "insert into person_with_uuid_extension (id, first_name, last_name) values ($1, $2, $3)", expected_types: expected_types.clone(), - param_values: param_values.clone() + param_values: param_values.clone(), }; assert_snapshot!( @@ -860,7 +860,7 @@ fn test_insert_infer_with_metadata() { let test = ParameterTestWithMetadata { sql: "PREPARE my_plan AS insert into person_with_uuid_extension (id, first_name, last_name) values ($1, $2, $3)", expected_types, - param_values + param_values, }; assert_snapshot!( @@ -1058,5 +1058,8 @@ fn test_prepare_statement_bad_list_idx() { let param_values = ParamValues::List(vec![]); let err = plan.replace_params_with_values(¶m_values).unwrap_err(); - assert_contains!(err.to_string(), "Error during planning: Failed to parse placeholder id: invalid digit found in string"); + assert_contains!( + err.to_string(), + "Error during planning: Failed to parse placeholder id: invalid digit found in string" + ); } diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 1c7813902ff7..725a7554fbe5 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -18,17 +18,17 @@ use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::{ - assert_contains, Column, DFSchema, DFSchemaRef, DataFusionError, Result, - TableReference, + Column, DFSchema, DFSchemaRef, DataFusionError, Result, TableReference, + assert_contains, }; use datafusion_expr::expr::{WindowFunction, WindowFunctionParams}; use datafusion_expr::test::function_stub::{ count_udaf, max_udaf, min_udaf, sum, sum_udaf, }; use datafusion_expr::{ - cast, col, lit, table_scan, wildcard, EmptyRelation, Expr, Extension, LogicalPlan, - LogicalPlanBuilder, Union, UserDefinedLogicalNode, UserDefinedLogicalNodeCore, - WindowFrame, WindowFunctionDefinition, + EmptyRelation, Expr, Extension, LogicalPlan, LogicalPlanBuilder, Union, + UserDefinedLogicalNode, UserDefinedLogicalNodeCore, WindowFrame, + WindowFunctionDefinition, cast, col, lit, table_scan, wildcard, }; use datafusion_functions::unicode; use datafusion_functions_aggregate::grouping::grouping_udaf; @@ -41,7 +41,7 @@ use datafusion_sql::unparser::dialect::{ DefaultDialect, Dialect as UnparserDialect, MySqlDialect as UnparserMySqlDialect, PostgreSqlDialect as UnparserPostgreSqlDialect, SqliteDialect, }; -use datafusion_sql::unparser::{expr_to_sql, plan_to_sql, Unparser}; +use datafusion_sql::unparser::{Unparser, expr_to_sql, plan_to_sql}; use insta::assert_snapshot; use sqlparser::ast::Statement; use std::hash::Hash; @@ -2071,7 +2071,8 @@ fn test_unparse_extension_to_statement() -> Result<()> { if let Some(err) = plan_to_sql(&extension).err() { assert_contains!( err.to_string(), - "This feature is not implemented: Unsupported extension node: MockUserDefinedLogicalPlan"); + "This feature is not implemented: Unsupported extension node: MockUserDefinedLogicalPlan" + ); } else { panic!("Expected error"); } @@ -2175,11 +2176,9 @@ fn test_unparse_optimized_multi_union() -> Result<()> { ); let plan = LogicalPlan::Union(Union { - inputs: vec![project( - empty.clone(), - vec![lit(1).alias("x"), lit("a").alias("y")], - )? - .into()], + inputs: vec![ + project(empty.clone(), vec![lit(1).alias("x"), lit("a").alias("y")])?.into(), + ], schema: dfschema.clone(), }); diff --git a/datafusion/sql/tests/common/mod.rs b/datafusion/sql/tests/common/mod.rs index 5d9fd9f2c374..44dd7cec89cb 100644 --- a/datafusion/sql/tests/common/mod.rs +++ b/datafusion/sql/tests/common/mod.rs @@ -24,7 +24,7 @@ use std::{sync::Arc, vec}; use arrow::datatypes::*; use datafusion_common::config::ConfigOptions; use datafusion_common::file_options::file_type::FileType; -use datafusion_common::{plan_err, DFSchema, GetExt, Result, TableReference}; +use datafusion_common::{DFSchema, GetExt, Result, TableReference, plan_err}; use datafusion_expr::planner::{ExprPlanner, PlannerResult, TypePlanner}; use datafusion_expr::{AggregateUDF, Expr, ScalarUDF, TableSource, WindowUDF}; use datafusion_functions_nested::expr_fn::make_array; diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index c52b09afce92..c19049df3ef8 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -27,11 +27,11 @@ use std::vec; use arrow::datatypes::{TimeUnit::Nanosecond, *}; use common::MockContextProvider; -use datafusion_common::{assert_contains, DataFusionError, Result}; +use datafusion_common::{DataFusionError, Result, assert_contains}; use datafusion_expr::{ - col, logical_plan::LogicalPlan, test::function_stub::sum_udaf, ColumnarValue, - CreateIndex, DdlStatement, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, - Volatility, + ColumnarValue, CreateIndex, DdlStatement, ScalarFunctionArgs, ScalarUDF, + ScalarUDFImpl, Signature, Volatility, col, logical_plan::LogicalPlan, + test::function_stub::sum_udaf, }; use datafusion_functions::{string, unicode}; use datafusion_sql::{ @@ -1339,8 +1339,8 @@ fn select_aggregate_with_group_by_with_having_using_column_by_alias() { } #[test] -fn select_aggregate_with_group_by_with_having_using_columns_with_and_without_their_aliases( -) { +fn select_aggregate_with_group_by_with_having_using_columns_with_and_without_their_aliases() + { let sql = "SELECT first_name AS fn, MAX(age) AS max_age FROM person GROUP BY first_name @@ -1447,8 +1447,8 @@ fn select_aggregate_aliased_with_group_by_with_having_referencing_aggregate_by_i } #[test] -fn select_aggregate_compound_aliased_with_group_by_with_having_referencing_compound_aggregate_by_its_alias( -) { +fn select_aggregate_compound_aliased_with_group_by_with_having_referencing_compound_aggregate_by_its_alias() + { let sql = "SELECT first_name, MAX(age) + 1 AS max_age_plus_one FROM person GROUP BY first_name @@ -1466,8 +1466,8 @@ fn select_aggregate_compound_aliased_with_group_by_with_having_referencing_compo } #[test] -fn select_aggregate_with_group_by_with_having_using_derived_column_aggregate_not_in_select( -) { +fn select_aggregate_with_group_by_with_having_using_derived_column_aggregate_not_in_select() + { let sql = "SELECT first_name, MAX(age) FROM person GROUP BY first_name @@ -2384,7 +2384,7 @@ fn create_external_table_with_compression_type() { "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON LOCATION 'foo.json.gz' OPTIONS ('format.compression' 'gzip')", "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON LOCATION 'foo.json.bz2' OPTIONS ('format.compression' 'bzip2')", "CREATE EXTERNAL TABLE t(c1 int) STORED AS NONSTANDARD LOCATION 'foo.unk' OPTIONS ('format.compression' 'gzip')", - ]; + ]; allow_duplicates! { for sql in sqls { @@ -3070,8 +3070,7 @@ Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ORDE /// ``` #[test] fn over_partition_by_order_by_no_dup() { - let sql = - "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id, qty ORDER BY qty) from orders"; + let sql = "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id, qty ORDER BY qty) from orders"; let plan = logical_plan(sql).unwrap(); assert_snapshot!( plan, @@ -3097,8 +3096,7 @@ Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orde /// ``` #[test] fn over_partition_by_order_by_mix_up() { - let sql = - "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id, qty ORDER BY qty), MIN(qty) OVER (PARTITION BY qty ORDER BY order_id) from orders"; + let sql = "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id, qty ORDER BY qty), MIN(qty) OVER (PARTITION BY qty ORDER BY order_id) from orders"; let plan = logical_plan(sql).unwrap(); assert_snapshot!( plan, @@ -3124,8 +3122,7 @@ Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orde /// FIXME: for now we are not detecting prefix of sorting keys in order to save one sort exec phase #[test] fn over_partition_by_order_by_mix_up_prefix() { - let sql = - "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id ORDER BY qty), MIN(qty) OVER (PARTITION BY order_id, qty ORDER BY price) from orders"; + let sql = "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id ORDER BY qty), MIN(qty) OVER (PARTITION BY order_id, qty ORDER BY price) from orders"; let plan = logical_plan(sql).unwrap(); assert_snapshot!( plan, @@ -3670,8 +3667,7 @@ Projection: p.id #[test] fn scalar_subquery() { - let sql = - "SELECT p.id, (SELECT MAX(id) FROM person WHERE last_name = p.last_name) FROM person p"; + let sql = "SELECT p.id, (SELECT MAX(id) FROM person WHERE last_name = p.last_name) FROM person p"; let plan = logical_plan(sql).unwrap(); assert_snapshot!( plan, @@ -4391,8 +4387,7 @@ fn test_select_unsupported_syntax_errors(#[case] sql: &str, #[case] error: &str) #[test] fn select_order_by_with_cast() { - let sql = - "SELECT first_name AS first_name FROM (SELECT first_name AS first_name FROM person) ORDER BY CAST(first_name as INT)"; + let sql = "SELECT first_name AS first_name FROM (SELECT first_name AS first_name FROM person) ORDER BY CAST(first_name as INT)"; let plan = logical_plan(sql).unwrap(); assert_snapshot!( plan, From eb24d7e50989148d21008963825b4c29ad1da682 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 10 Dec 2025 13:05:36 +0100 Subject: [PATCH 2/2] cargo fmt --- datafusion/sql/src/relation/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index 4ad37435115d..3115d8dfffbd 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -360,7 +360,7 @@ fn optimize_subquery_sort(plan: LogicalPlan) -> Result> // 2. RANK / ROW_NUMBER ... => Handled by a `WindowAggr` and its requirements. // 3. LIMIT => Handled by a `Sort`, so we need to search for it. let mut has_limit = false; - + plan.transform_down(|c| { if let LogicalPlan::Limit(_) = c { has_limit = true;