Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,7 @@ fn invoke_base_query_options<IT: InnerTypes>(b: &NativeBaseQueryOptions<IT>) ->
r.record("join_graph", b.join_graph());
r.record("security_context", b.security_context());
r.record("join_hints", b.join_hints());
r.record("subquery_joins", b.subquery_joins());
r
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ const BRIDGES: BridgeSpec[] = [
'row_limit',
'security_context',
'segments',
'subquery_joins',
'time_dimensions',
'timezone',
'total_query',
Expand Down
2 changes: 2 additions & 0 deletions packages/cubejs-schema-compiler/src/adapter/BaseQuery.js
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,7 @@ export class BaseQuery {
convertTzForRawTimeDimension: !!this.options.convertTzForRawTimeDimension,
maskedMembers: this.options.maskedMembers,
memberToAlias: this.options.memberToAlias,
subqueryJoins: this.options.subqueryJoins,
};

try {
Expand Down Expand Up @@ -1011,6 +1012,7 @@ export class BaseQuery {
securityContext: this.contextSymbols.securityContext,
cubestoreSupportMultistage: this.options.cubestoreSupportMultistage ?? getEnv('cubeStoreRollingWindowJoin'),
disableExternalPreAggregations: !!this.options.disableExternalPreAggregations,
subqueryJoins: this.options.subqueryJoins,
};

const buildResult = nativeBuildSqlAndParams(queryParams);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use super::join_graph::{JoinGraph, NativeJoinGraph};
use super::join_hints::JoinHintItem;
use super::options_member::OptionsMember;
use super::security_context::{NativeSecurityContext, SecurityContext};
use super::subquery_join::{NativeSubqueryJoin, SubqueryJoin};
use crate::cube_bridge::base_tools::{BaseTools, NativeBaseTools};
use crate::cube_bridge::evaluator::{CubeEvaluator, NativeCubeEvaluator};
use cubenativeutils::wrappers::serializer::{
Expand Down Expand Up @@ -106,4 +107,6 @@ pub trait BaseQueryOptions {
fn security_context(&self) -> Result<Rc<dyn SecurityContext>, CubeError>;
#[nbridge(field, optional, vec)]
fn join_hints(&self) -> Result<Option<Vec<JoinHintItem>>, CubeError>;
#[nbridge(field, optional, vec)]
fn subquery_joins(&self) -> Result<Option<Vec<Rc<dyn SubqueryJoin>>>, CubeError>;
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,6 @@ pub mod sql_templates_render;
pub mod sql_utils;
pub mod string_or_sql;
pub mod struct_with_sql_member;
pub mod subquery_join;
pub mod timeshift_definition;
pub mod view_filter_definition;
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use super::member_expression::{MemberExpressionDefinition, NativeMemberExpressionDefinition};
use cubenativeutils::wrappers::serializer::{
NativeDeserialize, NativeDeserializer, NativeSerialize,
};
use cubenativeutils::wrappers::{NativeContextHolder, NativeObjectHandle};
use cubenativeutils::CubeError;
use serde::{Deserialize, Serialize};
use std::any::Any;
use std::rc::Rc;

/// A query-level join against an opaque sub-query, originating from the
/// SQL API (cubesql) `subqueryJoins`. `sql` is a complete, pre-rendered
/// SELECT; `on` is the join condition expressed as a member expression
/// (same shape as a parsed member expression: `cubeName` + `expression`).
#[derive(Serialize, Deserialize, Debug, Clone, nativebridge::NativeBridgeStatic)]
pub struct SubqueryJoinStatic {
pub sql: String,
#[serde(rename = "joinType")]
pub join_type: Option<String>,
pub alias: String,
}
Comment on lines +17 to +21

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alias contract is trust-based. alias is consumed verbatim (no quoting) and the inner sql is emitted verbatim into the outer query. The PR description states the SQL API sends a pre-quoted alias and a controlled inner SELECT, which is fine for the cubesql producer, but the trait itself doesn't enforce or document the invariant. If any other caller of buildSqlAndParamsRust ever populates subqueryJoins (the JS BaseQuery now forwards this.options.subqueryJoins from any caller), an unquoted/attacker-controlled alias would land in the FROM clause untouched.

Consider tightening the doc-comment on this struct to state explicitly that:

  1. alias MUST be a fully-quoted identifier produced by the caller, and
  2. sql is emitted verbatim and MUST come from a trusted, planner-side path.

Optionally validate at compile time that alias starts/ends with the quote character (" or the dialect-specific one). Worth at least a // SAFETY: …-style note here so the next reader doesn't reintroduce quoting at a different layer.


#[nativebridge::native_bridge(SubqueryJoinStatic, with_static_meta)]
pub trait SubqueryJoin {
#[nbridge(field)]
fn on(&self) -> Result<Rc<dyn MemberExpressionDefinition>, CubeError>;
}
51 changes: 51 additions & 0 deletions rust/cube/cubesqlplanner/cubesqlplanner/src/logical_plan/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,29 @@ impl PrettyPrint for LogicalJoinItem {
}
}

/// A query-level join against an opaque pre-rendered sub-query, sourced
/// from the SQL API `subqueryJoins`. `sql` is a complete SELECT emitted
/// verbatim; `on_sql` is the compiled join condition (it references the
/// owning cube and the sub-query `alias` as a literal). Unlike
/// `LogicalJoinItem` there is no joined `Cube` node, so these are carried
/// as opaque data and never participate in input packing.
#[derive(Clone)]
pub struct LogicalSubqueryJoinItem {
pub sql: String,
pub alias: String,
pub join_type: String,
pub on_sql: Rc<SqlCall>,
}

impl PrettyPrint for LogicalSubqueryJoinItem {
fn pretty_print(&self, result: &mut PrettyPrintResult, state: &PrettyPrintState) {
result.println(
&format!("SubqueryJoinItem: {} AS {}", self.join_type, self.alias),
state,
);
}
}

/// Join of cubes that backs a query source: a `root` cube plus
/// non-root cubes (`joins`), optionally extended by sub-query
/// dimensions that contribute their own joined-in CTEs.
Expand All @@ -42,6 +65,8 @@ pub struct LogicalJoin {
joins: Vec<LogicalJoinItem>,
#[builder(default)]
dimension_subqueries: Vec<Rc<DimensionSubQuery>>,
#[builder(default)]
subquery_joins: Vec<LogicalSubqueryJoinItem>,
}

impl LogicalJoin {
Expand All @@ -56,6 +81,22 @@ impl LogicalJoin {
pub fn dimension_subqueries(&self) -> &Vec<Rc<DimensionSubQuery>> {
&self.dimension_subqueries
}

pub fn subquery_joins(&self) -> &Vec<LogicalSubqueryJoinItem> {
&self.subquery_joins
}

/// Returns a copy of this join with the given query-level sub-query
/// joins attached. Used to fold in SQL-API `subqueryJoins` after the
/// cube join tree has been built.
pub fn with_subquery_joins(&self, subquery_joins: Vec<LogicalSubqueryJoinItem>) -> Rc<Self> {
Rc::new(Self {
root: self.root.clone(),
joins: self.joins.clone(),
dimension_subqueries: self.dimension_subqueries.clone(),
subquery_joins,
})
}
}

impl LogicalNode for LogicalJoin {
Expand Down Expand Up @@ -101,6 +142,9 @@ impl LogicalNode for LogicalJoin {
.map(|itm| itm.clone().into_logical_node())
.collect::<Result<Vec<_>, _>>()?,
)
// Sub-query joins are opaque data (no child plan nodes), so they are
// not packed/unpacked as inputs; clone them through unchanged.
.subquery_joins(self.subquery_joins.clone())
.build();

Ok(Rc::new(result))
Expand Down Expand Up @@ -191,6 +235,13 @@ impl PrettyPrint for LogicalJoin {
subquery.pretty_print(result, &details_state);
}
}
if !self.subquery_joins().is_empty() {
result.println("subquery_joins:", &state);
let details_state = state.new_level();
for subquery_join in self.subquery_joins().iter() {
subquery_join.pretty_print(result, &details_state);
}
}
} else {
result.println(&format!("Empty source"), state);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ pub enum SingleSource {
Subquery(Rc<QueryPlan>),
Cube(Rc<BaseCube>),
TableReference(String, Rc<Schema>),
/// An opaque, pre-rendered SQL sub-query (a complete SELECT) emitted
/// verbatim wrapped in parentheses. Used for SQL-API `subqueryJoins`,
/// whose body is built outside the planner.
RawSubquerySql(String),
}

impl SingleSource {
Expand All @@ -25,6 +29,7 @@ impl SingleSource {
}
SingleSource::Subquery(s) => format!("({})", s.to_sql(templates)?),
SingleSource::TableReference(r, _) => format!(" {} ", r),
SingleSource::RawSubquerySql(sql) => format!("({})", sql),
};
Ok(sql)
}
Expand All @@ -34,6 +39,7 @@ impl SingleSource {
SingleSource::Subquery(subquery) => subquery.schema(),
SingleSource::Cube(_) => Rc::new(Schema::empty()),
SingleSource::TableReference(_, schema) => schema.clone(),
SingleSource::RawSubquerySql(_) => Rc::new(Schema::empty()),
}
}
}
Expand Down Expand Up @@ -91,7 +97,13 @@ impl SingleAliasedSource {
}
} */

templates.query_aliased(&sql, &self.alias)
match &self.source {
// The alias of a SQL-API sub-query join is already a final, quoted
// identifier and is referenced verbatim in the join ON condition.
// Re-quoting it would double the quotes and break the reference.
SingleSource::RawSubquerySql(_) => templates.query_aliased_prequoted(&sql, &self.alias),
_ => templates.query_aliased(&sql, &self.alias),
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,9 @@ impl ReferencesBuilder {
}
SingleSource::Cube(_) => None,
SingleSource::TableReference(_, schema) => schema.resolve_member_reference(member),
// Opaque pre-rendered SQL: members are referenced via the alias as
// literals in the ON/projection SQL, not resolved against a schema.
SingleSource::RawSubquerySql(_) => None,
};
column_name.map(|col| QualifiedColumnName::new(Some(source.alias.clone()), col))
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::super::{LogicalNodeProcessor, ProcessableNode, PushDownBuilderContext};
use crate::logical_plan::LogicalJoin;
use crate::physical_plan::{From, JoinBuilder, JoinCondition};
use crate::physical_plan::{From, JoinBuilder, JoinCondition, SingleSource};
use crate::physical_plan_builder::PhysicalPlanBuilder;
use crate::planner::SqlJoinCondition;
use cubenativeutils::CubeError;
Expand Down Expand Up @@ -38,6 +38,7 @@ impl<'a> LogicalNodeProcessor<'a, LogicalJoin> for LogicalJoinProcessor<'a> {
let root = logical_join.root().clone().unwrap().cube().clone();
if logical_join.joins().is_empty()
&& logical_join.dimension_subqueries().is_empty()
&& logical_join.subquery_joins().is_empty()
&& multi_stage_dimension.is_none()
{
Ok(From::new_from_cube(
Expand Down Expand Up @@ -90,6 +91,22 @@ impl<'a> LogicalNodeProcessor<'a, LogicalJoin> for LogicalJoinProcessor<'a> {
&context,
)?;
}
for subquery_join in logical_join.subquery_joins().iter() {
let source = SingleSource::RawSubquerySql(subquery_join.sql.clone());
let on = JoinCondition::new_base_join(SqlJoinCondition::try_new(
subquery_join.on_sql.clone(),
)?);
if subquery_join.join_type.eq_ignore_ascii_case("INNER") {
join_builder.inner_join_source(source, subquery_join.alias.clone(), on);
} else if subquery_join.join_type.eq_ignore_ascii_case("LEFT") {
join_builder.left_join_source(source, subquery_join.alias.clone(), on);
} else {
return Err(CubeError::user(format!(
"Unsupported join type '{}' for sub-query join, expected INNER or LEFT",
subquery_join.join_type
)));
}
Comment thread
claude[bot] marked this conversation as resolved.
}
Ok(From::new_from_join(join_builder.build()))
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use super::{DimensionSubqueryPlanner, JoinPlanner};
use crate::logical_plan::*;
use crate::planner::collectors::collect_sub_query_dimensions_from_symbols;
use crate::planner::collectors::{collect_join_hints, collect_sub_query_dimensions_from_symbols};
use crate::planner::join_hints::JoinHints;
use crate::planner::planners::multi_stage::PlanningScope;
use crate::planner::state::State;
use crate::planner::QueryProperties;
Expand Down Expand Up @@ -77,12 +78,33 @@ impl SimpleQueryPlanner {
)?;
let subquery_dimension_queries =
dimension_subquery_planner.plan_queries(&subquery_dimensions, scope)?;
// Query-level SQL-API sub-query joins (from `subqueryJoins`) extend the
// FROM clause with opaque joined sub-queries.
let subquery_joins = self.query_properties.subquery_joins();
let source = if let Some(join) = &join {
self.join_planner
.make_join_logical_plan(join, subquery_dimension_queries)
} else if !subquery_joins.is_empty() {
// No selected member resolves the base cube, but the sub-query joins'
// ON conditions reference it (e.g. `SELECT t.col FROM Cube JOIN (...) t
// ON Cube.x = t.x`). Derive the join root from those ON references so
// the base cube — and the sub-query joins below — are emitted.
let mut hints = JoinHints::new();
for subquery_join in subquery_joins {
for dep in subquery_join.on_sql.get_dependencies() {
hints.extend(&collect_join_hints(&dep)?);
}
}
self.join_planner
.make_join_logical_plan_with_join_hints(hints, subquery_dimension_queries)?
} else {
self.join_planner.make_empty_join_logical_plan()
};
let source = if subquery_joins.is_empty() {
source
} else {
source.with_subquery_joins(subquery_joins.clone())
};
Ok(source)
Comment on lines +83 to 108

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two minor smells in the assembly:

  1. subquery_joins.clone() is called even though with_subquery_joins already takes ownership of a Vec. Since subquery_joins here is a &Vec, the clone is needed — but the resulting source then carries a copy of items that the QueryProperties already owns. Not a correctness issue, just a small allocation per query.

  2. When join.is_none() and subquery_joins.is_empty() you fall through to make_empty_join_logical_plan(). If someone sends a subqueryJoins with an empty ON dependency set (degenerate but possible), hints ends up empty and make_join_logical_plan_with_join_hints(empty_hints, …) is called — worth a quick check that that path doesn't blow up. Adding a test for "subqueryJoins with no cube reference in ON" would close that gap.

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

use super::state::State;
use super::MemberSymbol;
use crate::logical_plan::LogicalSubqueryJoinItem;
use crate::planner::collectors::{collect_multiplied_measures, has_multi_stage_members};
use crate::planner::filter::tree_ops;
use crate::planner::filter::{Filter, FilterGroup, FilterItem, FilterOperator};
Expand Down Expand Up @@ -175,6 +176,10 @@ pub struct QueryProperties {
disable_external_pre_aggregations: bool,
#[builder(default)]
pre_aggregation_id: Option<String>,
/// Query-level joins against opaque sub-queries, from the SQL API
/// `subqueryJoins`. Folded into the query's `LogicalJoin` source.
#[builder(default)]
subquery_joins: Vec<LogicalSubqueryJoinItem>,
#[builder(setter(skip), default)]
multi_fact_join_groups: OnceCell<MultiFactJoinGroups>,
}
Expand Down Expand Up @@ -206,6 +211,10 @@ impl QueryProperties {
self.allow_multi_stage
}

pub fn subquery_joins(&self) -> &Vec<LogicalSubqueryJoinItem> {
&self.subquery_joins
}

// Push every entry of `dimensions_filters` into matching `case`
// expressions on each member, filter and order item. Run once at
// construction; mutators do not re-apply it.
Expand Down Expand Up @@ -1136,6 +1145,7 @@ impl PartialEq for QueryProperties {
disable_external_pre_aggregations,
pre_aggregation_id,
query_join_hints,
subquery_joins,
// Not part of semantic equality:
query_tools: _,
multi_fact_join_groups: _,
Expand All @@ -1160,5 +1170,14 @@ impl PartialEq for QueryProperties {
&& *disable_external_pre_aggregations == other.disable_external_pre_aggregations
&& *pre_aggregation_id == other.pre_aggregation_id
&& *query_join_hints == other.query_join_hints
// Compare sub-query joins by their request-derived identity (the
// compiled `on_sql` is derived deterministically from these).
&& subquery_joins.len() == other.subquery_joins.len()
&& subquery_joins
.iter()
.zip(other.subquery_joins.iter())
.all(|(a, b)| {
a.sql == b.sql && a.alias == b.alias && a.join_type == b.join_type
})
}
Comment on lines +1173 to 1182

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PartialEq ignores on_sql. The justification (the compiled SqlCall is derived deterministically from sql/alias/join_type) isn't quite right — on_sql is compiled from the on member expression, not from the three fields compared here. Two subqueryJoins entries can share (sql, alias, join_type) while having different on conditions and still compare equal.

In practice QueryProperties::eq seems to be used for plan-caching/equivalence — a false positive here could let one query reuse another's cached plan. Either:

  • include on_sql in the comparison (e.g. via SqlCall's structural eq if available), or
  • explicitly call this out as "identity, not semantic equality" in the doc.

}
Loading
Loading