-
Notifications
You must be signed in to change notification settings - Fork 2.1k
feat(tesseract): Support SQL API grouped sub-query joins #11138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| use super::member_expression::{MemberExpressionDefinition, NativeMemberExpressionDefinition}; | ||
| use cubenativeutils::wrappers::serializer::{ | ||
| NativeDeserialize, NativeDeserializer, NativeSerialize, | ||
| }; | ||
| use cubenativeutils::wrappers::{NativeContextHolder, NativeObjectHandle}; | ||
| use cubenativeutils::CubeError; | ||
| use serde::{Deserialize, Serialize}; | ||
| use std::any::Any; | ||
| use std::rc::Rc; | ||
|
|
||
| /// A query-level join against an opaque sub-query, originating from the | ||
| /// SQL API (cubesql) `subqueryJoins`. `sql` is a complete, pre-rendered | ||
| /// SELECT; `on` is the join condition expressed as a member expression | ||
| /// (same shape as a parsed member expression: `cubeName` + `expression`). | ||
| #[derive(Serialize, Deserialize, Debug, Clone, nativebridge::NativeBridgeStatic)] | ||
| pub struct SubqueryJoinStatic { | ||
| pub sql: String, | ||
| #[serde(rename = "joinType")] | ||
| pub join_type: Option<String>, | ||
| pub alias: String, | ||
| } | ||
|
|
||
| #[nativebridge::native_bridge(SubqueryJoinStatic, with_static_meta)] | ||
| pub trait SubqueryJoin { | ||
| #[nbridge(field)] | ||
| fn on(&self) -> Result<Rc<dyn MemberExpressionDefinition>, CubeError>; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| use super::{DimensionSubqueryPlanner, JoinPlanner}; | ||
| use crate::logical_plan::*; | ||
| use crate::planner::collectors::collect_sub_query_dimensions_from_symbols; | ||
| use crate::planner::collectors::{collect_join_hints, collect_sub_query_dimensions_from_symbols}; | ||
| use crate::planner::join_hints::JoinHints; | ||
| use crate::planner::planners::multi_stage::PlanningScope; | ||
| use crate::planner::state::State; | ||
| use crate::planner::QueryProperties; | ||
|
|
@@ -77,12 +78,33 @@ impl SimpleQueryPlanner { | |
| )?; | ||
| let subquery_dimension_queries = | ||
| dimension_subquery_planner.plan_queries(&subquery_dimensions, scope)?; | ||
| // Query-level SQL-API sub-query joins (from `subqueryJoins`) extend the | ||
| // FROM clause with opaque joined sub-queries. | ||
| let subquery_joins = self.query_properties.subquery_joins(); | ||
| let source = if let Some(join) = &join { | ||
| self.join_planner | ||
| .make_join_logical_plan(join, subquery_dimension_queries) | ||
| } else if !subquery_joins.is_empty() { | ||
| // No selected member resolves the base cube, but the sub-query joins' | ||
| // ON conditions reference it (e.g. `SELECT t.col FROM Cube JOIN (...) t | ||
| // ON Cube.x = t.x`). Derive the join root from those ON references so | ||
| // the base cube — and the sub-query joins below — are emitted. | ||
| let mut hints = JoinHints::new(); | ||
| for subquery_join in subquery_joins { | ||
| for dep in subquery_join.on_sql.get_dependencies() { | ||
| hints.extend(&collect_join_hints(&dep)?); | ||
| } | ||
| } | ||
| self.join_planner | ||
| .make_join_logical_plan_with_join_hints(hints, subquery_dimension_queries)? | ||
| } else { | ||
| self.join_planner.make_empty_join_logical_plan() | ||
| }; | ||
| let source = if subquery_joins.is_empty() { | ||
| source | ||
| } else { | ||
| source.with_subquery_joins(subquery_joins.clone()) | ||
| }; | ||
| Ok(source) | ||
|
Comment on lines
+83
to
108
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two minor smells in the assembly:
|
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
|
|
||
| use super::state::State; | ||
| use super::MemberSymbol; | ||
| use crate::logical_plan::LogicalSubqueryJoinItem; | ||
| use crate::planner::collectors::{collect_multiplied_measures, has_multi_stage_members}; | ||
| use crate::planner::filter::tree_ops; | ||
| use crate::planner::filter::{Filter, FilterGroup, FilterItem, FilterOperator}; | ||
|
|
@@ -175,6 +176,10 @@ pub struct QueryProperties { | |
| disable_external_pre_aggregations: bool, | ||
| #[builder(default)] | ||
| pre_aggregation_id: Option<String>, | ||
| /// Query-level joins against opaque sub-queries, from the SQL API | ||
| /// `subqueryJoins`. Folded into the query's `LogicalJoin` source. | ||
| #[builder(default)] | ||
| subquery_joins: Vec<LogicalSubqueryJoinItem>, | ||
| #[builder(setter(skip), default)] | ||
| multi_fact_join_groups: OnceCell<MultiFactJoinGroups>, | ||
| } | ||
|
|
@@ -206,6 +211,10 @@ impl QueryProperties { | |
| self.allow_multi_stage | ||
| } | ||
|
|
||
| pub fn subquery_joins(&self) -> &Vec<LogicalSubqueryJoinItem> { | ||
| &self.subquery_joins | ||
| } | ||
|
|
||
| // Push every entry of `dimensions_filters` into matching `case` | ||
| // expressions on each member, filter and order item. Run once at | ||
| // construction; mutators do not re-apply it. | ||
|
|
@@ -1136,6 +1145,7 @@ impl PartialEq for QueryProperties { | |
| disable_external_pre_aggregations, | ||
| pre_aggregation_id, | ||
| query_join_hints, | ||
| subquery_joins, | ||
| // Not part of semantic equality: | ||
| query_tools: _, | ||
| multi_fact_join_groups: _, | ||
|
|
@@ -1160,5 +1170,14 @@ impl PartialEq for QueryProperties { | |
| && *disable_external_pre_aggregations == other.disable_external_pre_aggregations | ||
| && *pre_aggregation_id == other.pre_aggregation_id | ||
| && *query_join_hints == other.query_join_hints | ||
| // Compare sub-query joins by their request-derived identity (the | ||
| // compiled `on_sql` is derived deterministically from these). | ||
| && subquery_joins.len() == other.subquery_joins.len() | ||
| && subquery_joins | ||
| .iter() | ||
| .zip(other.subquery_joins.iter()) | ||
| .all(|(a, b)| { | ||
| a.sql == b.sql && a.alias == b.alias && a.join_type == b.join_type | ||
| }) | ||
| } | ||
|
Comment on lines
+1173
to
1182
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In practice
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Alias contract is trust-based.
aliasis consumed verbatim (no quoting) and the innersqlis emitted verbatim into the outer query. The PR description states the SQL API sends a pre-quoted alias and a controlled inner SELECT, which is fine for the cubesql producer, but the trait itself doesn't enforce or document the invariant. If any other caller ofbuildSqlAndParamsRustever populatessubqueryJoins(the JS BaseQuery now forwardsthis.options.subqueryJoinsfrom any caller), an unquoted/attacker-controlled alias would land in the FROM clause untouched.Consider tightening the doc-comment on this struct to state explicitly that:
aliasMUST be a fully-quoted identifier produced by the caller, andsqlis emitted verbatim and MUST come from a trusted, planner-side path.Optionally validate at compile time that
aliasstarts/ends with the quote character ("or the dialect-specific one). Worth at least a// SAFETY: …-style note here so the next reader doesn't reintroduce quoting at a different layer.