Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

**Fixes**:

- Allow a derived column to share its name with the source relation (e.g.
`from sales.sales | derive {sales = ...}`), which previously failed to resolve
the remaining columns.

**Documentation**:

**Web**:
Expand Down
7 changes: 7 additions & 0 deletions prqlc/prqlc/src/semantic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,13 @@ pub const NS_INFER: &str = "_infer";
// implies we can infer new module declarations in the containing module
pub const NS_INFER_MODULE: &str = "_infer_module";

// when a column shadows a source relation's name (e.g.
// `from bar | derive {bar = ...}`), the shadowing column is nested inside the
// input namespace under this key. This keeps the input's inference template and
// qualified `bar.col` access intact, while leaf access to `bar` resolves to the
// shadowing column (see `Module::lookup` and the ident resolver).
pub const NS_SHADOWING_COL: &str = "_shadowing_col";

impl Stmt {
pub fn new(kind: StmtKind) -> Stmt {
Stmt {
Expand Down
202 changes: 198 additions & 4 deletions prqlc/prqlc/src/semantic/module.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::collections::{HashMap, HashSet};

use super::{
NS_DEFAULT_DB, NS_INFER, NS_INFER_MODULE, NS_MAIN, NS_PARAM, NS_QUERY_DEF, NS_SELF, NS_STD,
NS_THAT, NS_THIS,
NS_DEFAULT_DB, NS_INFER, NS_INFER_MODULE, NS_MAIN, NS_PARAM, NS_QUERY_DEF, NS_SELF,
NS_SHADOWING_COL, NS_STD, NS_THAT, NS_THIS,
};
use crate::ir::decl::{Decl, DeclKind, Module, RootModule, TableDecl, TableExpr};
use crate::ir::pl::{Annotation, Expr, Ident, Lineage, LineageColumn};
Expand Down Expand Up @@ -163,6 +163,19 @@ impl Module {
}
} else if let Some(decl) = module.names.get(&prefix) {
if let DeclKind::Module(inner) = &decl.kind {
// A column shadowing this relation's name (nested under
// `NS_SHADOWING_COL`) takes precedence for leaf access. We
// return the bare relation ident; the ident resolver then
// unwraps it to the nested column (keeping the emitted name
// clean). Qualified `prefix.col` access still recurses into
// the relation below. Gate on it being a `Column` to match
// the resolver's unwrap site, keeping the invariant explicit.
if matches!(
inner.names.get(NS_SHADOWING_COL).map(|d| &d.kind),
Some(DeclKind::Column(_))
) {
return HashSet::from([Ident::from_name(prefix)]);
}
if inner.names.contains_key(NS_SELF) {
return HashSet::from([Ident::from_path(vec![
prefix,
Expand Down Expand Up @@ -284,7 +297,7 @@ impl Module {
order: col_index + 1,
..Default::default()
};
ns.names.insert(name.name.clone(), decl);
insert_possibly_shadowing_col(ns, &name.name, decl);
}
_ => {}
}
Expand All @@ -301,7 +314,7 @@ impl Module {
let namespace = self.names.entry(namespace.to_string()).or_default();
let namespace = namespace.kind.as_module_mut().unwrap();

namespace.names.insert(name, DeclKind::Column(id).into());
insert_possibly_shadowing_col(namespace, &name, DeclKind::Column(id).into());
}

pub fn shadow(&mut self, ident: &str) {
Expand Down Expand Up @@ -469,6 +482,29 @@ impl RootModule {
}
}

/// Insert a column into `namespace`. If the column's name collides with a
/// source input namespace (e.g. `from bar | derive {bar = ...}`), nest it
/// inside that namespace under [`NS_SHADOWING_COL`] rather than overwriting it.
/// The input namespace holds the `NS_INFER` template and is the redirect target
/// used to infer every other column, so clobbering it would break resolution of
/// all sibling columns, and removing it would break qualified `bar.col` access.
/// Leaf access to `bar` resolves to the nested shadowing column (see
/// [`Module::lookup`] and the ident resolver).
fn insert_possibly_shadowing_col(namespace: &mut Module, name: &str, decl: Decl) {
if let Some(Decl {
kind: DeclKind::Module(input),
..
}) = namespace.names.get_mut(name)
{
// Last writer wins on a repeated shadow. This is safe because each
// pipeline step rebuilds the frame from lineage, so a single namespace
// never accumulates two shadows of the same name.
input.names.insert(NS_SHADOWING_COL.to_string(), decl);
} else {
namespace.names.insert(name.to_string(), decl);
}
}

pub fn ty_of_lineage(lineage: &Lineage) -> Ty {
Ty::relation(
lineage
Expand Down Expand Up @@ -526,4 +562,162 @@ mod tests {
module.unshadow("test_name");
assert_eq!(module.get(&ident).unwrap(), &decl);
}

// A column that shares its name with the source relation used to clobber
// the relation's input namespace, breaking inference of every other
// column. See the `insert_frame` collision handling above.
#[test]
fn test_column_shadows_relation_name() {
use insta::assert_snapshot;

// minimal repro: derived column `bar` shadows source `bar`
assert_snapshot!(crate::tests::compile(
"from bar | derive { bar = this.a } | select { this.x, this.bar }"
).unwrap(), @"
SELECT
x,
a AS bar
FROM
bar
");

// original source columns are still inferable after the collision
assert_snapshot!(crate::tests::compile(
"from bar | derive { bar = this.a } | select { this.a }"
).unwrap(), @r"
SELECT
a
FROM
bar
");

// collision against a relation alias (not the table name)
assert_snapshot!(crate::tests::compile(
"from b=bar | derive { b = this.a } | select { this.x, this.b }"
).unwrap(), @"
SELECT
x,
a AS b
FROM
bar AS b
");

// full real-world shape: group/aggregate/sort then a column named
// after the source relation
assert_snapshot!(crate::tests::compile(
"from sales.sales \
| group { this.category } ( aggregate { col1 = sum this.amount } ) \
| sort { this.category } \
| derive { sales = this.col1 } \
| select { this.category, this.sales }"
).unwrap(), @"
WITH table_0 AS (
SELECT
category,
COALESCE(SUM(amount), 0) AS _expr_0
FROM
sales.sales
GROUP BY
category
)
SELECT
category,
_expr_0 AS sales
FROM
table_0
ORDER BY
category
");
}

// References to *other* columns of the shadowed relation within the *same*
// tuple must still resolve: nesting the shadowing column inside the input
// namespace (rather than overwriting it) keeps the input's inference
// template available for siblings resolved later in the tuple.
#[test]
fn test_column_shadows_relation_name_intra_tuple() {
use insta::assert_snapshot;

// `this.x` should still resolve after `bar` shadows the source `bar`
// within the same `derive`.
assert_snapshot!(crate::tests::compile(
"from bar | derive { bar = this.a, x2 = this.x }"
).unwrap(), @r"
SELECT
*,
a AS bar,
x AS x2
FROM
bar
");
}

// Explicit qualified access to a *different* column of the shadowed input
// (e.g. `bar.x`) keeps resolving to the relation, while the bare name
// (`this.bar`) resolves to the shadowing column. Both can appear in the
// same `select` without one being dropped during SQL projection.
#[test]
fn test_column_shadows_relation_name_qualified_access() {
use insta::assert_snapshot;

assert_snapshot!(crate::tests::compile(
"from bar | join foo (==id) | derive { bar = bar.a } | select { bar.x, this.bar }"
).unwrap(), @"
SELECT
bar.x,
bar.a AS bar
FROM
bar
INNER JOIN foo ON bar.id = foo.id
");
}

// Wildcard expansion over a shadowed relation must not crash: `this.*` and
// `bar.*` need the relation *module*, which is preserved alongside the
// nested shadowing column.
#[test]
fn test_column_shadows_relation_name_wildcard() {
use insta::assert_snapshot;

assert_snapshot!(crate::tests::compile(
"from bar | derive { bar = this.a } | select this.*"
).unwrap(), @"
SELECT
*
FROM
bar
");

assert_snapshot!(crate::tests::compile(
"from bar | derive { bar = this.a } | select bar.*"
).unwrap(), @"
SELECT
*
FROM
bar
");
}

// Pre-existing limitation (not introduced here, but easier to hit with
// shadowing): referencing `this.<name>` *before* a column shadows that name
// in the same tuple resolves to the relation, which expands to a wildcard
// and drops the alias. Documented so the behavior is intentional rather than
// incidental.
#[test]
fn test_column_shadows_relation_name_forward_reference() {
use insta::assert_snapshot;

// `x2 = this.bar` resolves to the relation `bar` (the shadowing column
// is only added later in the tuple), expanding to `*` and losing `x2`.
assert_snapshot!(crate::tests::compile(
"from bar | derive { x2 = this.bar, bar = this.a }"

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure this makes sense. If I as a user wrote from bar | derive { x2 = this.bar } I would not expect the generated SQL to be SELECT *, * FROM BAR. I would be expecting that this.bar would be referring to a column inside the bar relation named bar, so the generated SQL ought to be SELECT *, bar AS x2 FROM bar. This is what's generated if the derive references any other name -- from bar | derive { x2 = this.baz } results in SELECT *, baz AS x2 FROM bar.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess maybe the broader question is, why does this.bar resolve to the parent relation bar? It seems that specifying this.anything should always resolve anything to only within the namespace of the current relation and never to the parent namespace. Although maybe I'm missing some currently established bit of PRQL functionality...

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two things worth separating here, with the behavior verified against the compiler:

This is pre-existing — not introduced by this PR. On main (aab7438, before this branch) the same query already produces the *:

from bar | derive { x2 = this.bar }
→ SELECT *, * FROM bar

The PR documents it in test_column_shadows_relation_name_forward_reference rather than changing it, and it's orthogonal to what this PR actually fixes (a shadowing derived column like derive { bar = ... } previously broke resolution of all its sibling columns).

Why this.bar resolves to the relation: from bar binds the relation itself to the name bar in the current (this) namespace. this.bar looks bar up there, finds the relation, and expands it to *. A name that doesn't collide with the relation — this.baz — finds nothing under that name and is inferred as a column, giving baz AS x2. So the asymmetry you're seeing is the bare leaf colliding with the relation's own name in this.

To reach a column named bar today, the qualified form does the right thing:

from bar | derive { x2 = bar.bar }
→ SELECT *, bar AS x2 FROM bar

Your broader question — should this.X only ever resolve within the relation's columns and never to the relation name itself — is a real design question, but a wider change than this PR's scope. That call is the maintainers'; flagging that the answer to "am I missing established functionality?" is: yes, from <rel> putting <rel> into the this namespace is the current established behavior, and <rel>.<col> is the qualified escape hatch.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no sense in adding a test to the test suite that encodes bad behavior -- that's why this question is relevant against this PR. I'm fine with this test if the behavior is intended but would suggest dropping the test if we'd like to fix this in the future.

That said, what design intent is met by adding the name of the current relation to the this namespace? Is there a reason why I would want to refer to this.bar and have that reflect a relation rather than a column?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question — I traced this through the resolver and verified each result against the compiler on this branch.

Short version: this isn't a flat bag of columns. Each input relation is registered in this as a named sub-namespace, keyed by the relation's name. That registration is exactly what makes qualified access work — bar.x, and after a join the disambiguation between bar.x and foo.y (you can even write this.bar.x). So bar lives in this for the same reason foo does after a join: it's an input. The bare leaf this.bar then collides with that input name, and an input resolves to its wildcard; bar.bar is the escape hatch to the column. So it isn't a parent-namespace leak — the relation is a first-class member of this, colliding with a column of the same name inside that one namespace.

On "encoding bad behavior": this PR actually moves toward the resolution you're describing. When a derived column realizes a name that collides with the relation, the column now wins:

from bar | derive { bar = this.a } | select { this.x, this.bar }
→ SELECT x, a AS bar FROM bar     # this.bar resolves to the column, not *

The residual that test_column_shadows_relation_name_forward_reference documents is narrower than the general case — it's a forward reference within the same tuple, where this.bar is read before the shadowing column is added:

from bar | derive { x2 = this.bar, bar = this.a }
→ SELECT *, *, a AS bar FROM bar  # x2 = this.bar still hits the relation

Whether that narrow residual is "bad behavior to drop the test for" or "intentional, with bar.bar as the documented escape hatch" is genuinely a maintainer design call, so I won't adjudicate it. The two readings are clear though: if the intent is that leaf this.<rel> should always prefer a column (your broader proposal), then the test does encode behavior you'd want to change, and dropping it or marking it a known-limitation TODO would signal that; if the wildcard expansion is accepted as-is, keeping the test as documentation of the edge is reasonable.

Mechanism + evidence
  • Input relations are inserted into this by name when a pipeline step resolves: resolver/functions.rs:290 (insert_frame(frame, NS_THIS)).
  • Leaf-name precedence is in module.rs lookup: a column nested under NS_SHADOWING_COL wins; otherwise the bare relation/input name is returned and expands to *. The shadowing column is nested rather than overwriting the input at module.rs:487-502.
  • Every SQL block above was produced by prqlc compile on this branch (b992209), not hand-derived.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kgutwin Thank you for your review :) The test was mostly added to flag existing behavior that I'll try to address later. If you want me to, I can delete the test, or put the correct expected output in it and skip it with an associated issue number ?

Regarding your question:

That said, what design intent is met by adding the name of the current relation to the this namespace? Is there a reason why I would want to refer to this.bar and have that reflect a relation rather than a column?

I believe this is a side effect of a bare this meaning * to have the count this resolve to COUNT(*): https://prql-lang.org/book/reference/syntax/keywords.html#this--that

I grepped through teh docs a little and this.bar referring to a "everything in the bar relation" is not documented so I'd be in favor of removing that behavior. We need to keep supporting count this though since it's in the docs.

Also, I'm not sure what a bare this should mean in other contetxts ? for example what should the behavior be for from bar | select this ? Could be SELECT * FROM bar, could be an error where we suggest using select this.* or explicit column names instead.

I'm happy to help on that but the design is not my decision to make, so I think the work on it should happen in another PR.

Please let me know what you think the behavior should be. I'd be happy over input from other maintainers as well 😄

).unwrap(), @"
SELECT
*,
*,
a AS bar
FROM
bar
");
}
}
25 changes: 24 additions & 1 deletion prqlc/prqlc/src/semantic/resolver/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::ir::pl;
use crate::ir::pl::PlFold;
use crate::pr::{Ty, TyKind, TyTupleField};
use crate::semantic::resolver::{flatten, types, Resolver};
use crate::semantic::{NS_INFER, NS_SELF, NS_THAT, NS_THIS};
use crate::semantic::{NS_INFER, NS_SELF, NS_SHADOWING_COL, NS_THAT, NS_THIS};
use crate::utils::IdGenerator;
use crate::Result;
use crate::{Error, Reason, Span, WithErrorInfo};
Expand Down Expand Up @@ -94,6 +94,24 @@ impl pl::PlFold for Resolver<'_> {
..node
},

// A relation whose name is shadowed by a column: leaf
// resolution yields the bare relation ident, and the
// shadowing column is nested under `NS_SHADOWING_COL`. Unwrap
// to that column, keeping `fq_ident` for a clean emitted name.
DeclKind::Module(inner)
if matches!(
inner.names.get(NS_SHADOWING_COL).map(|d| &d.kind),
Some(DeclKind::Column(_))
) =>
{
let target_id = *inner.names[NS_SHADOWING_COL].kind.as_column().unwrap();
pl::Expr {
kind: pl::ExprKind::Ident(fq_ident),
target_id: Some(target_id),
..node
}
}

DeclKind::TableDecl(_) => {
let input_name = ident.name.clone();

Expand Down Expand Up @@ -290,6 +308,11 @@ impl Resolver<'_> {
}

for (name, decl) in module.names.iter().sorted_by_key(|(_, d)| d.order) {
// a column nested here only to shadow the relation's name is not
// part of the relation's own wildcard expansion
if name == NS_SHADOWING_COL {
continue;
}
res.push(match &decl.kind {
DeclKind::Module(submodule) => {
let prefix = [prefix.to_vec(), vec![name]].concat();
Expand Down
13 changes: 11 additions & 2 deletions prqlc/prqlc/src/semantic/resolver/names.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use super::Resolver;
use crate::ir::decl::{Decl, DeclKind, Module};
use crate::ir::pl::{Expr, ExprKind};
use crate::pr::Ident;
use crate::semantic::{NS_INFER, NS_INFER_MODULE, NS_SELF, NS_THAT, NS_THIS};
use crate::semantic::{NS_INFER, NS_INFER_MODULE, NS_SELF, NS_SHADOWING_COL, NS_THAT, NS_THIS};
use crate::Error;
use crate::Result;
use crate::WithErrorInfo;
Expand Down Expand Up @@ -72,7 +72,16 @@ impl Resolver<'_> {

for (ident, decl) in this.as_decls().into_iter().sorted_by_key(|x| x.1.order) {
if let DeclKind::Column(_) = decl.kind {
cols.push(ident);
// A column shadowing a relation's name is nested under
// `NS_SHADOWING_COL`; present it under the relation's own name
// rather than leaking the internal key.
if ident.name == NS_SHADOWING_COL {
if let Some(parent) = ident.pop() {
cols.push(parent);
}
} else {
cols.push(ident);
}
}
}
cols
Expand Down
10 changes: 7 additions & 3 deletions prqlc/prqlc/src/sql/gen_projection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,14 @@ fn deduplicate_select_items(items: &mut Vec<SelectItem>) {
let mut seen = HashSet::new();
items.retain(|select_item| match select_item {
SelectItem::UnnamedExpr(sql_ast::Expr::CompoundIdentifier(idents)) => {
// If any of the identifiers hadn't been seen yet, retain the expr
idents.iter().any(|ident| seen.insert(ident.clone()))
// Key on the whole qualified path, so a table qualifier (e.g. `bar`
// in `bar.x`) is never conflated with a same-named column alias.
// Keying on `value` ignores `quote_style`, so items differing only
// in quoting dedup together (intended).
let key = idents.iter().map(|i| i.value.clone()).collect::<Vec<_>>();
seen.insert(key)
}
SelectItem::ExprWithAlias { alias, .. } => seen.insert(alias.clone()),
SelectItem::ExprWithAlias { alias, .. } => seen.insert(vec![alias.value.clone()]),
_ => true,
});
}
Expand Down
Loading