From efed8681a764d91640fdb4cf5e7ef56ad88313ce Mon Sep 17 00:00:00 2001 From: Thomas Santerre Date: Mon, 22 Jun 2026 13:12:42 -0400 Subject: [PATCH] fix: apply recursive CTE column-list aliases to the static term `WITH RECURSIVE t(n) AS (SELECT 1 UNION ALL SELECT n + 1 FROM t WHERE n < 10)` failed to plan with `Schema error: No field named n. Valid fields are t."Int64(1)".`. The CTE's declared column-list names were applied (via `apply_table_alias`) only after the whole CTE plan was built, but the recursive working relation is derived from the schema of the static term before that, so the self-reference could never resolve the declared names. Apply the column-list aliases to the static term inside `recursive_cte`, before the work table is created, so the working relation and the self-reference expose the declared names. The relation-name alias is still added by the caller; on the recursive path the column re-alias is skipped to avoid a redundant projection on top of the `RecursiveQuery` node. The non-UNION fallback applies the aliases directly. Non-recursive CTEs are unchanged. Adds sqllogictest coverage for single- and multi-column column-list recursive CTEs, UNION (DISTINCT), and the column/alias-count mismatch error. --- datafusion/sql/src/cte.rs | 33 ++++++-- datafusion/sqllogictest/test_files/cte.slt | 89 ++++++++++++++++++++++ 2 files changed, 114 insertions(+), 8 deletions(-) diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs index 31cb22f4efcac..f735b336018cc 100644 --- a/datafusion/sql/src/cte.rs +++ b/datafusion/sql/src/cte.rs @@ -21,11 +21,11 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use arrow::datatypes::{Schema, SchemaRef}; use datafusion_common::{ - Result, not_impl_err, plan_err, + Result, TableReference, not_impl_err, plan_err, tree_node::{TreeNode, TreeNodeRecursion}, }; use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, TableSource}; -use sqlparser::ast::{Query, SetExpr, SetOperator, With}; +use sqlparser::ast::{Ident, Query, SetExpr, SetOperator, With}; impl SqlToRel<'_, S> { pub(super) fn plan_with_clause( @@ -46,14 +46,24 @@ impl SqlToRel<'_, S> { // Create a logical plan for the CTE let cte_plan = if is_recursive { - self.recursive_cte(&cte_name, *cte.query, planner_context)? + let columns = cte.alias.columns.iter().map(|c| c.name.clone()).collect(); + self.recursive_cte(&cte_name, columns, *cte.query, planner_context)? } else { self.non_recursive_cte(*cte.query, planner_context)? }; - // Each `WITH` block can change the column names in the last - // projection (e.g. "WITH table(t1, t2) AS SELECT 1, 2"). - let final_plan = self.apply_table_alias(cte_plan, cte.alias)?; + // Each `WITH` block can change the column names in the last projection + // (e.g. "WITH table(t1, t2) AS SELECT 1, 2"). Recursive CTEs apply those + // to the static term in recursive_cte(), so only the relation name here. + let final_plan = if is_recursive { + LogicalPlanBuilder::from(cte_plan) + .alias(TableReference::bare( + self.ident_normalizer.normalize(cte.alias.name), + ))? + .build()? + } else { + self.apply_table_alias(cte_plan, cte.alias)? + }; // Export the CTE to the outer query planner_context.insert_cte(cte_name, final_plan); } @@ -71,6 +81,7 @@ impl SqlToRel<'_, S> { fn recursive_cte( &self, cte_name: &str, + columns: Vec, mut cte_query: Query, planner_context: &mut PlannerContext, ) -> Result { @@ -91,9 +102,11 @@ impl SqlToRel<'_, S> { set_quantifier, } => (left, right, set_quantifier), other => { - // If the query is not a UNION, then it is not a recursive CTE + // Not a UNION, so not actually a recursive CTE. The caller adds only + // the relation name for recursive CTEs, so apply the column aliases here. *cte_query.body = other; - return self.non_recursive_cte(cte_query, planner_context); + let plan = self.non_recursive_cte(cte_query, planner_context)?; + return self.apply_expr_alias(plan, columns); } }; @@ -111,6 +124,10 @@ impl SqlToRel<'_, S> { // ---------- Step 1: Compile the static term ------------------ let static_plan = self.set_expr_to_plan(*left_expr, planner_context)?; + // Apply the declared column-list aliases (e.g. `t(n)`) to the static term, so + // the work table built from its schema below exposes the declared names. + let static_plan = self.apply_expr_alias(static_plan, columns)?; + // Since the recursive CTEs include a component that references a // table with its name, like the example below: // diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt index 0b93f6fc10177..22c81eca87071 100644 --- a/datafusion/sqllogictest/test_files/cte.slt +++ b/datafusion/sqllogictest/test_files/cte.slt @@ -179,6 +179,95 @@ physical_plan 07)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 08)----------WorkTableExec: name=nodes +# recursive CTE with a column-list alias (e.g. `t(n)`): the declared names must be +# applied to the static term so the recursive self-reference can resolve them +query I rowsort +WITH RECURSIVE t(n) AS ( + SELECT 1 + UNION ALL + SELECT n + 1 FROM t WHERE n < 10 +) +SELECT n FROM t +---- +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 + +# recursive CTE with a multi-column column-list alias +query II rowsort +WITH RECURSIVE t(a, b) AS ( + SELECT 1, 2 + UNION ALL + SELECT a + 1, b * 2 FROM t WHERE a < 5 +) +SELECT a, b FROM t +---- +1 2 +2 4 +3 8 +4 16 +5 32 + +# recursive CTE with a column-list alias and UNION (DISTINCT) +query I rowsort +WITH RECURSIVE t(n) AS ( + SELECT 1 + UNION + SELECT n + 1 FROM t WHERE n < 5 +) +SELECT n FROM t +---- +1 +2 +3 +4 +5 + +# recursive CTE column-list alias arity mismatch is rejected cleanly (raised at +# the static term, rather than the old confusing "No field named ...") +query error DataFusion error: Error during planning: Source table contains 1 columns but only 2 names given as column alias +WITH RECURSIVE t(a, b) AS ( + SELECT 1 + UNION ALL + SELECT a + 1 FROM t WHERE a < 3 +) +SELECT * FROM t + +# explain a column-list-aliased recursive CTE: the declared name is applied to +# the static term, so there is no extra projection on top of RecursiveQuery +query TT +EXPLAIN WITH RECURSIVE t(n) AS ( + SELECT 1 + UNION ALL + SELECT n + 1 FROM t WHERE n < 10 +) +SELECT * FROM t +---- +logical_plan +01)SubqueryAlias: t +02)--RecursiveQuery: is_distinct=false +03)----Projection: Int64(1) AS n +04)------EmptyRelation: rows=1 +05)----Projection: t.n + Int64(1) +06)------Filter: t.n < Int64(10) +07)--------TableScan: t projection=[n] +physical_plan +01)RecursiveQueryExec: name=t, is_distinct=false +02)--ProjectionExec: expr=[CAST(1 AS Int64) as n] +03)----PlaceholderRowExec +04)--CoalescePartitionsExec +05)----ProjectionExec: expr=[n@0 + 1 as n] +06)------FilterExec: n@0 < 10 +07)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +08)----------WorkTableExec: name=t + # simple deduplicating recursive CTE works query I WITH RECURSIVE nodes AS (