diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index e21edeccdce60..8d8e4ae633576 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -20,7 +20,7 @@ use arrow::compute::can_cast_types; use datafusion_expr::binary::BinaryTypeCoercer; use itertools::{Itertools as _, izip}; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use crate::analyzer::AnalyzerRule; use crate::utils::NamePreserver; @@ -91,11 +91,11 @@ impl AnalyzerRule for TypeCoercion { } fn analyze(&self, plan: LogicalPlan, config: &ConfigOptions) -> Result { - let empty_schema = DFSchema::empty(); + static EMPTY_SCHEMA: LazyLock = LazyLock::new(DFSchema::empty); // recurse let transformed_plan = plan - .transform_up_with_subqueries(|plan| analyze_internal(&empty_schema, plan))? + .transform_up_with_subqueries(|plan| analyze_internal(&EMPTY_SCHEMA, plan))? .data; // finish diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 2096c42770315..213f2b37bc082 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -325,11 +325,7 @@ impl CommonSubexprEliminate { .map(|expr| Some(name_preserver.save(expr))) .collect::>() } else { - new_aggr_expr - .clone() - .into_iter() - .map(|_| None) - .collect::>() + (0..new_aggr_expr.len()).map(|_| None).collect() }; let mut agg_exprs = common_exprs diff --git a/datafusion/optimizer/src/optimize_unions.rs b/datafusion/optimizer/src/optimize_unions.rs index 900757b9a0607..80f8ebeef1697 100644 --- a/datafusion/optimizer/src/optimize_unions.rs +++ b/datafusion/optimizer/src/optimize_unions.rs @@ -64,11 +64,11 @@ impl OptimizerRule for OptimizeUnions { let inputs = inputs .into_iter() .flat_map(extract_plans_from_union) - .map(|plan| coerce_plan_expr_for_schema(plan, &schema)) + .map(|plan| Ok(Arc::new(coerce_plan_expr_for_schema(plan, &schema)?))) .collect::>>()?; Ok(Transformed::yes(LogicalPlan::Union(Union { - inputs: inputs.into_iter().map(Arc::new).collect_vec(), + inputs, schema, }))) } diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index 1eb117f8abdce..76ed0129515fd 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -320,10 +320,8 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> Result { /// * do nothing. fn extract_or_clauses_for_join<'a>( filters: &'a [Expr], - schema: &'a DFSchema, + schema_cols: &'a HashSet, ) -> impl Iterator + 'a { - let schema_columns = schema_columns(schema); - // new formed OR clauses and their column references filters.iter().filter_map(move |expr| { if let Expr::BinaryExpr(BinaryExpr { @@ -332,8 +330,8 @@ fn extract_or_clauses_for_join<'a>( right, }) = expr { - let left_expr = extract_or_clause(left.as_ref(), &schema_columns); - let right_expr = extract_or_clause(right.as_ref(), &schema_columns); + let left_expr = extract_or_clause(left.as_ref(), schema_cols); + let right_expr = extract_or_clause(right.as_ref(), schema_cols); // If nothing can be extracted from any sub clauses, do nothing for this OR clause. if let (Some(left_expr), Some(right_expr)) = (left_expr, right_expr) { @@ -421,6 +419,10 @@ fn push_down_all_join( // 3) should be kept as filter conditions let left_schema = join.left.schema(); let right_schema = join.right.schema(); + + let left_schema_columns = schema_columns(left_schema.as_ref()); + let right_schema_columns = schema_columns(right_schema.as_ref()); + let mut left_push = vec![]; let mut right_push = vec![]; let mut keep_predicates = vec![]; @@ -467,12 +469,24 @@ fn push_down_all_join( // Extract from OR clause, generate new predicates for both side of join if possible. // We only track the unpushable predicates above. if left_preserved { - left_push.extend(extract_or_clauses_for_join(&keep_predicates, left_schema)); - left_push.extend(extract_or_clauses_for_join(&join_conditions, left_schema)); + left_push.extend(extract_or_clauses_for_join( + &keep_predicates, + &left_schema_columns, + )); + left_push.extend(extract_or_clauses_for_join( + &join_conditions, + &left_schema_columns, + )); } if right_preserved { - right_push.extend(extract_or_clauses_for_join(&keep_predicates, right_schema)); - right_push.extend(extract_or_clauses_for_join(&join_conditions, right_schema)); + right_push.extend(extract_or_clauses_for_join( + &keep_predicates, + &right_schema_columns, + )); + right_push.extend(extract_or_clauses_for_join( + &join_conditions, + &right_schema_columns, + )); } // For predicates from join filter, we should check with if a join side is preserved @@ -480,13 +494,13 @@ fn push_down_all_join( if on_left_preserved { left_push.extend(extract_or_clauses_for_join( &on_filter_join_conditions, - left_schema, + &left_schema_columns, )); } if on_right_preserved { right_push.extend(extract_or_clauses_for_join( &on_filter_join_conditions, - right_schema, + &right_schema_columns, )); } diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs index 755e192e340d9..4a26cd5884f6b 100644 --- a/datafusion/optimizer/src/push_down_limit.rs +++ b/datafusion/optimizer/src/push_down_limit.rs @@ -47,12 +47,12 @@ impl OptimizerRule for PushDownLimit { true } + #[expect(clippy::only_used_in_recursion)] fn rewrite( &self, plan: LogicalPlan, config: &dyn OptimizerConfig, ) -> Result> { - let _ = config.options(); let LogicalPlan::Limit(mut limit) = plan else { return Ok(Transformed::no(plan)); }; diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 12449ed2f4d55..d1939dc72eb53 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -26,6 +26,7 @@ use std::borrow::Cow; use std::collections::HashSet; use std::ops::Not; use std::sync::Arc; +use std::sync::LazyLock; use datafusion_common::config::ConfigOptions; use datafusion_common::nested_struct::has_one_of_more_common_fields; @@ -498,8 +499,6 @@ struct ConstEvaluator { /// The `config_options` are passed from the session to allow scalar functions /// to access configuration like timezone. execution_props: ExecutionProps, - input_schema: DFSchema, - input_batch: RecordBatch, } /// The simplify result of ConstEvaluator @@ -575,6 +574,18 @@ impl TreeNodeRewriter for ConstEvaluator { } } +static DUMMY_SCHEMA: LazyLock> = + LazyLock::new(|| Arc::new(Schema::new(vec![Field::new(".", DataType::Null, true)]))); + +static DUMMY_DF_SCHEMA: LazyLock = + LazyLock::new(|| DFSchema::try_from(Arc::clone(&*DUMMY_SCHEMA)).unwrap()); + +static DUMMY_BATCH: LazyLock = LazyLock::new(|| { + // Need a single "input" row to produce a single output row + let col = new_null_array(&DataType::Null, 1); + RecordBatch::try_new(DUMMY_SCHEMA.clone(), vec![col]).unwrap() +}); + impl ConstEvaluator { /// Create a new `ConstantEvaluator`. /// @@ -588,16 +599,6 @@ impl ConstEvaluator { pub fn try_new(config_options: Option>) -> Result { // The dummy column name is unused and doesn't matter as only // expressions without column references can be evaluated - static DUMMY_COL_NAME: &str = "."; - let schema = Arc::new(Schema::new(vec![Field::new( - DUMMY_COL_NAME, - DataType::Null, - true, - )])); - let input_schema = DFSchema::try_from(Arc::clone(&schema))?; - // Need a single "input" row to produce a single output row - let col = new_null_array(&DataType::Null, 1); - let input_batch = RecordBatch::try_new(schema, vec![col])?; let mut execution_props = ExecutionProps::new(); execution_props.config_options = config_options; @@ -605,8 +606,6 @@ impl ConstEvaluator { Ok(Self { can_evaluate: vec![], execution_props, - input_schema, - input_batch, }) } @@ -702,16 +701,13 @@ impl ConstEvaluator { return ConstSimplifyResult::NotSimplified(s, m); } - let phys_expr = match create_physical_expr( - &expr, - &self.input_schema, - &self.execution_props, - ) { - Ok(e) => e, - Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr), - }; + let phys_expr = + match create_physical_expr(&expr, &DUMMY_DF_SCHEMA, &self.execution_props) { + Ok(e) => e, + Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr), + }; let metadata = phys_expr - .return_field(self.input_batch.schema_ref()) + .return_field(DUMMY_BATCH.schema_ref()) .ok() .and_then(|f| { let m = f.metadata(); @@ -720,7 +716,7 @@ impl ConstEvaluator { false => Some(FieldMetadata::from(m)), } }); - let col_val = match phys_expr.evaluate(&self.input_batch) { + let col_val = match phys_expr.evaluate(&DUMMY_BATCH) { Ok(v) => v, Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr), }; @@ -1698,10 +1694,11 @@ impl TreeNodeRewriter for Simplifier<'_> { { // Repeated occurrences of wildcard are redundant so remove them // exp LIKE '%%' --> exp LIKE '%' - let simplified_pattern = Regex::new("%%+") - .unwrap() - .replace_all(pattern_str, "%") - .to_string(); + + static LIKE_REGEX: LazyLock = + LazyLock::new(|| Regex::new("%%+").unwrap()); + let simplified_pattern = + LIKE_REGEX.replace_all(pattern_str, "%").to_string(); Transformed::yes(Expr::Like(Like { pattern: Box::new( string_scalar.to_expr(&simplified_pattern),