From b9922097e1407fc143fa98e97546b8d738489fac Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Thu, 18 Jun 2026 10:41:19 +0200 Subject: [PATCH] fix: allow a column to share its name with the source relation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A derived/selected column whose name matched the source relation (e.g. `from sales.sales | derive {sales = ...}`) clobbered the relation's input namespace, which holds the inference template and redirect used to resolve every other column — so all sibling columns failed with "Unknown name". Nest the shadowing column under a reserved `NS_SHADOWING_COL` key inside the input namespace instead of overwriting it. Leaf access (`this.sales`) resolves to the shadowing column with a clean emitted name, while qualified access (`sales.col`) and column inference still go through the preserved relation namespace. Also fix `deduplicate_select_items`, which conflated a table qualifier with a same-named column alias and dropped the aliased column. Co-Authored-By: Claude Opus 4.8 --- CHANGELOG.md | 4 + prqlc/prqlc/src/semantic/mod.rs | 7 + prqlc/prqlc/src/semantic/module.rs | 202 ++++++++++++++++++++- prqlc/prqlc/src/semantic/resolver/expr.rs | 25 ++- prqlc/prqlc/src/semantic/resolver/names.rs | 13 +- prqlc/prqlc/src/sql/gen_projection.rs | 10 +- 6 files changed, 251 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6131d087a82e..b2d9b637d231 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ **Fixes**: +- Allow a derived column to share its name with the source relation (e.g. + `from sales.sales | derive {sales = ...}`), which previously failed to resolve + the remaining columns. + **Documentation**: **Web**: diff --git a/prqlc/prqlc/src/semantic/mod.rs b/prqlc/prqlc/src/semantic/mod.rs index 3d9f23396b63..3d91a4d889f3 100644 --- a/prqlc/prqlc/src/semantic/mod.rs +++ b/prqlc/prqlc/src/semantic/mod.rs @@ -111,6 +111,13 @@ pub const NS_INFER: &str = "_infer"; // implies we can infer new module declarations in the containing module pub const NS_INFER_MODULE: &str = "_infer_module"; +// when a column shadows a source relation's name (e.g. +// `from bar | derive {bar = ...}`), the shadowing column is nested inside the +// input namespace under this key. This keeps the input's inference template and +// qualified `bar.col` access intact, while leaf access to `bar` resolves to the +// shadowing column (see `Module::lookup` and the ident resolver). +pub const NS_SHADOWING_COL: &str = "_shadowing_col"; + impl Stmt { pub fn new(kind: StmtKind) -> Stmt { Stmt { diff --git a/prqlc/prqlc/src/semantic/module.rs b/prqlc/prqlc/src/semantic/module.rs index 4f841bd76162..24ead54f38f5 100644 --- a/prqlc/prqlc/src/semantic/module.rs +++ b/prqlc/prqlc/src/semantic/module.rs @@ -1,8 +1,8 @@ use std::collections::{HashMap, HashSet}; use super::{ - NS_DEFAULT_DB, NS_INFER, NS_INFER_MODULE, NS_MAIN, NS_PARAM, NS_QUERY_DEF, NS_SELF, NS_STD, - NS_THAT, NS_THIS, + NS_DEFAULT_DB, NS_INFER, NS_INFER_MODULE, NS_MAIN, NS_PARAM, NS_QUERY_DEF, NS_SELF, + NS_SHADOWING_COL, NS_STD, NS_THAT, NS_THIS, }; use crate::ir::decl::{Decl, DeclKind, Module, RootModule, TableDecl, TableExpr}; use crate::ir::pl::{Annotation, Expr, Ident, Lineage, LineageColumn}; @@ -163,6 +163,19 @@ impl Module { } } else if let Some(decl) = module.names.get(&prefix) { if let DeclKind::Module(inner) = &decl.kind { + // A column shadowing this relation's name (nested under + // `NS_SHADOWING_COL`) takes precedence for leaf access. We + // return the bare relation ident; the ident resolver then + // unwraps it to the nested column (keeping the emitted name + // clean). Qualified `prefix.col` access still recurses into + // the relation below. Gate on it being a `Column` to match + // the resolver's unwrap site, keeping the invariant explicit. + if matches!( + inner.names.get(NS_SHADOWING_COL).map(|d| &d.kind), + Some(DeclKind::Column(_)) + ) { + return HashSet::from([Ident::from_name(prefix)]); + } if inner.names.contains_key(NS_SELF) { return HashSet::from([Ident::from_path(vec![ prefix, @@ -284,7 +297,7 @@ impl Module { order: col_index + 1, ..Default::default() }; - ns.names.insert(name.name.clone(), decl); + insert_possibly_shadowing_col(ns, &name.name, decl); } _ => {} } @@ -301,7 +314,7 @@ impl Module { let namespace = self.names.entry(namespace.to_string()).or_default(); let namespace = namespace.kind.as_module_mut().unwrap(); - namespace.names.insert(name, DeclKind::Column(id).into()); + insert_possibly_shadowing_col(namespace, &name, DeclKind::Column(id).into()); } pub fn shadow(&mut self, ident: &str) { @@ -469,6 +482,29 @@ impl RootModule { } } +/// Insert a column into `namespace`. If the column's name collides with a +/// source input namespace (e.g. `from bar | derive {bar = ...}`), nest it +/// inside that namespace under [`NS_SHADOWING_COL`] rather than overwriting it. +/// The input namespace holds the `NS_INFER` template and is the redirect target +/// used to infer every other column, so clobbering it would break resolution of +/// all sibling columns, and removing it would break qualified `bar.col` access. +/// Leaf access to `bar` resolves to the nested shadowing column (see +/// [`Module::lookup`] and the ident resolver). +fn insert_possibly_shadowing_col(namespace: &mut Module, name: &str, decl: Decl) { + if let Some(Decl { + kind: DeclKind::Module(input), + .. + }) = namespace.names.get_mut(name) + { + // Last writer wins on a repeated shadow. This is safe because each + // pipeline step rebuilds the frame from lineage, so a single namespace + // never accumulates two shadows of the same name. + input.names.insert(NS_SHADOWING_COL.to_string(), decl); + } else { + namespace.names.insert(name.to_string(), decl); + } +} + pub fn ty_of_lineage(lineage: &Lineage) -> Ty { Ty::relation( lineage @@ -526,4 +562,162 @@ mod tests { module.unshadow("test_name"); assert_eq!(module.get(&ident).unwrap(), &decl); } + + // A column that shares its name with the source relation used to clobber + // the relation's input namespace, breaking inference of every other + // column. See the `insert_frame` collision handling above. + #[test] + fn test_column_shadows_relation_name() { + use insta::assert_snapshot; + + // minimal repro: derived column `bar` shadows source `bar` + assert_snapshot!(crate::tests::compile( + "from bar | derive { bar = this.a } | select { this.x, this.bar }" + ).unwrap(), @" + SELECT + x, + a AS bar + FROM + bar + "); + + // original source columns are still inferable after the collision + assert_snapshot!(crate::tests::compile( + "from bar | derive { bar = this.a } | select { this.a }" + ).unwrap(), @r" + SELECT + a + FROM + bar + "); + + // collision against a relation alias (not the table name) + assert_snapshot!(crate::tests::compile( + "from b=bar | derive { b = this.a } | select { this.x, this.b }" + ).unwrap(), @" + SELECT + x, + a AS b + FROM + bar AS b + "); + + // full real-world shape: group/aggregate/sort then a column named + // after the source relation + assert_snapshot!(crate::tests::compile( + "from sales.sales \ + | group { this.category } ( aggregate { col1 = sum this.amount } ) \ + | sort { this.category } \ + | derive { sales = this.col1 } \ + | select { this.category, this.sales }" + ).unwrap(), @" + WITH table_0 AS ( + SELECT + category, + COALESCE(SUM(amount), 0) AS _expr_0 + FROM + sales.sales + GROUP BY + category + ) + SELECT + category, + _expr_0 AS sales + FROM + table_0 + ORDER BY + category + "); + } + + // References to *other* columns of the shadowed relation within the *same* + // tuple must still resolve: nesting the shadowing column inside the input + // namespace (rather than overwriting it) keeps the input's inference + // template available for siblings resolved later in the tuple. + #[test] + fn test_column_shadows_relation_name_intra_tuple() { + use insta::assert_snapshot; + + // `this.x` should still resolve after `bar` shadows the source `bar` + // within the same `derive`. + assert_snapshot!(crate::tests::compile( + "from bar | derive { bar = this.a, x2 = this.x }" + ).unwrap(), @r" + SELECT + *, + a AS bar, + x AS x2 + FROM + bar + "); + } + + // Explicit qualified access to a *different* column of the shadowed input + // (e.g. `bar.x`) keeps resolving to the relation, while the bare name + // (`this.bar`) resolves to the shadowing column. Both can appear in the + // same `select` without one being dropped during SQL projection. + #[test] + fn test_column_shadows_relation_name_qualified_access() { + use insta::assert_snapshot; + + assert_snapshot!(crate::tests::compile( + "from bar | join foo (==id) | derive { bar = bar.a } | select { bar.x, this.bar }" + ).unwrap(), @" + SELECT + bar.x, + bar.a AS bar + FROM + bar + INNER JOIN foo ON bar.id = foo.id + "); + } + + // Wildcard expansion over a shadowed relation must not crash: `this.*` and + // `bar.*` need the relation *module*, which is preserved alongside the + // nested shadowing column. + #[test] + fn test_column_shadows_relation_name_wildcard() { + use insta::assert_snapshot; + + assert_snapshot!(crate::tests::compile( + "from bar | derive { bar = this.a } | select this.*" + ).unwrap(), @" + SELECT + * + FROM + bar + "); + + assert_snapshot!(crate::tests::compile( + "from bar | derive { bar = this.a } | select bar.*" + ).unwrap(), @" + SELECT + * + FROM + bar + "); + } + + // Pre-existing limitation (not introduced here, but easier to hit with + // shadowing): referencing `this.` *before* a column shadows that name + // in the same tuple resolves to the relation, which expands to a wildcard + // and drops the alias. Documented so the behavior is intentional rather than + // incidental. + #[test] + fn test_column_shadows_relation_name_forward_reference() { + use insta::assert_snapshot; + + // `x2 = this.bar` resolves to the relation `bar` (the shadowing column + // is only added later in the tuple), expanding to `*` and losing `x2`. + assert_snapshot!(crate::tests::compile( + "from bar | derive { x2 = this.bar, bar = this.a }" + ).unwrap(), @" + SELECT + *, + *, + a AS bar + FROM + bar + "); + } } diff --git a/prqlc/prqlc/src/semantic/resolver/expr.rs b/prqlc/prqlc/src/semantic/resolver/expr.rs index a2e7f5354ddb..c400c7db4150 100644 --- a/prqlc/prqlc/src/semantic/resolver/expr.rs +++ b/prqlc/prqlc/src/semantic/resolver/expr.rs @@ -5,7 +5,7 @@ use crate::ir::pl; use crate::ir::pl::PlFold; use crate::pr::{Ty, TyKind, TyTupleField}; use crate::semantic::resolver::{flatten, types, Resolver}; -use crate::semantic::{NS_INFER, NS_SELF, NS_THAT, NS_THIS}; +use crate::semantic::{NS_INFER, NS_SELF, NS_SHADOWING_COL, NS_THAT, NS_THIS}; use crate::utils::IdGenerator; use crate::Result; use crate::{Error, Reason, Span, WithErrorInfo}; @@ -94,6 +94,24 @@ impl pl::PlFold for Resolver<'_> { ..node }, + // A relation whose name is shadowed by a column: leaf + // resolution yields the bare relation ident, and the + // shadowing column is nested under `NS_SHADOWING_COL`. Unwrap + // to that column, keeping `fq_ident` for a clean emitted name. + DeclKind::Module(inner) + if matches!( + inner.names.get(NS_SHADOWING_COL).map(|d| &d.kind), + Some(DeclKind::Column(_)) + ) => + { + let target_id = *inner.names[NS_SHADOWING_COL].kind.as_column().unwrap(); + pl::Expr { + kind: pl::ExprKind::Ident(fq_ident), + target_id: Some(target_id), + ..node + } + } + DeclKind::TableDecl(_) => { let input_name = ident.name.clone(); @@ -290,6 +308,11 @@ impl Resolver<'_> { } for (name, decl) in module.names.iter().sorted_by_key(|(_, d)| d.order) { + // a column nested here only to shadow the relation's name is not + // part of the relation's own wildcard expansion + if name == NS_SHADOWING_COL { + continue; + } res.push(match &decl.kind { DeclKind::Module(submodule) => { let prefix = [prefix.to_vec(), vec![name]].concat(); diff --git a/prqlc/prqlc/src/semantic/resolver/names.rs b/prqlc/prqlc/src/semantic/resolver/names.rs index a1c3ddfbbf88..1447549e4263 100644 --- a/prqlc/prqlc/src/semantic/resolver/names.rs +++ b/prqlc/prqlc/src/semantic/resolver/names.rs @@ -6,7 +6,7 @@ use super::Resolver; use crate::ir::decl::{Decl, DeclKind, Module}; use crate::ir::pl::{Expr, ExprKind}; use crate::pr::Ident; -use crate::semantic::{NS_INFER, NS_INFER_MODULE, NS_SELF, NS_THAT, NS_THIS}; +use crate::semantic::{NS_INFER, NS_INFER_MODULE, NS_SELF, NS_SHADOWING_COL, NS_THAT, NS_THIS}; use crate::Error; use crate::Result; use crate::WithErrorInfo; @@ -72,7 +72,16 @@ impl Resolver<'_> { for (ident, decl) in this.as_decls().into_iter().sorted_by_key(|x| x.1.order) { if let DeclKind::Column(_) = decl.kind { - cols.push(ident); + // A column shadowing a relation's name is nested under + // `NS_SHADOWING_COL`; present it under the relation's own name + // rather than leaking the internal key. + if ident.name == NS_SHADOWING_COL { + if let Some(parent) = ident.pop() { + cols.push(parent); + } + } else { + cols.push(ident); + } } } cols diff --git a/prqlc/prqlc/src/sql/gen_projection.rs b/prqlc/prqlc/src/sql/gen_projection.rs index eba54e0c3d62..6eb99204fdea 100644 --- a/prqlc/prqlc/src/sql/gen_projection.rs +++ b/prqlc/prqlc/src/sql/gen_projection.rs @@ -125,10 +125,14 @@ fn deduplicate_select_items(items: &mut Vec) { let mut seen = HashSet::new(); items.retain(|select_item| match select_item { SelectItem::UnnamedExpr(sql_ast::Expr::CompoundIdentifier(idents)) => { - // If any of the identifiers hadn't been seen yet, retain the expr - idents.iter().any(|ident| seen.insert(ident.clone())) + // Key on the whole qualified path, so a table qualifier (e.g. `bar` + // in `bar.x`) is never conflated with a same-named column alias. + // Keying on `value` ignores `quote_style`, so items differing only + // in quoting dedup together (intended). + let key = idents.iter().map(|i| i.value.clone()).collect::>(); + seen.insert(key) } - SelectItem::ExprWithAlias { alias, .. } => seen.insert(alias.clone()), + SelectItem::ExprWithAlias { alias, .. } => seen.insert(vec![alias.value.clone()]), _ => true, }); }