Skip to content

Commit 86f8976

Browse files
authored
feat: Add expression registry to native planner (#2851)
1 parent 81e19be commit 86f8976

File tree

10 files changed

+841
-210
lines changed

10 files changed

+841
-210
lines changed

docs/source/contributor-guide/adding_a_new_expression.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,8 @@ How this works is somewhat dependent on the type of expression you're adding. Ex
271271
If you're adding a new expression that requires custom protobuf serialization, you may need to:
272272

273273
1. Add a new message to the protobuf definition in `native/proto/src/proto/expr.proto`
274-
2. Update the Rust deserialization code to handle the new protobuf message type
274+
2. Add a native expression handler in `expression_registry.rs` to deserialize the new protobuf message type and
275+
create a native expression
275276

276277
For most expressions, you can skip this step if you're using the existing scalar function infrastructure.
277278

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Arithmetic expression builders
19+
20+
/// Macro to generate arithmetic expression builders that need eval_mode handling
21+
#[macro_export]
22+
macro_rules! arithmetic_expr_builder {
23+
($builder_name:ident, $expr_type:ident, $operator:expr) => {
24+
pub struct $builder_name;
25+
26+
impl $crate::execution::planner::traits::ExpressionBuilder for $builder_name {
27+
fn build(
28+
&self,
29+
spark_expr: &datafusion_comet_proto::spark_expression::Expr,
30+
input_schema: arrow::datatypes::SchemaRef,
31+
planner: &$crate::execution::planner::PhysicalPlanner,
32+
) -> Result<
33+
std::sync::Arc<dyn datafusion::physical_expr::PhysicalExpr>,
34+
$crate::execution::operators::ExecutionError,
35+
> {
36+
let expr = $crate::extract_expr!(spark_expr, $expr_type);
37+
let eval_mode =
38+
$crate::execution::planner::from_protobuf_eval_mode(expr.eval_mode)?;
39+
planner.create_binary_expr(
40+
expr.left.as_ref().unwrap(),
41+
expr.right.as_ref().unwrap(),
42+
expr.return_type.as_ref(),
43+
$operator,
44+
input_schema,
45+
eval_mode,
46+
)
47+
}
48+
}
49+
};
50+
}
51+
52+
use std::sync::Arc;
53+
54+
use arrow::datatypes::SchemaRef;
55+
use datafusion::logical_expr::Operator as DataFusionOperator;
56+
use datafusion::physical_expr::PhysicalExpr;
57+
use datafusion_comet_proto::spark_expression::Expr;
58+
use datafusion_comet_spark_expr::{create_modulo_expr, create_negate_expr, EvalMode};
59+
60+
use crate::execution::{
61+
expressions::extract_expr,
62+
operators::ExecutionError,
63+
planner::{
64+
from_protobuf_eval_mode, traits::ExpressionBuilder, BinaryExprOptions, PhysicalPlanner,
65+
},
66+
};
67+
68+
/// Macro to define basic arithmetic builders that use eval_mode
69+
macro_rules! define_basic_arithmetic_builders {
70+
($(($builder:ident, $expr_type:ident, $op:expr)),* $(,)?) => {
71+
$(
72+
arithmetic_expr_builder!($builder, $expr_type, $op);
73+
)*
74+
};
75+
}
76+
77+
define_basic_arithmetic_builders![
78+
(AddBuilder, Add, DataFusionOperator::Plus),
79+
(SubtractBuilder, Subtract, DataFusionOperator::Minus),
80+
(MultiplyBuilder, Multiply, DataFusionOperator::Multiply),
81+
(DivideBuilder, Divide, DataFusionOperator::Divide),
82+
];
83+
84+
/// Builder for IntegralDivide expressions (requires special options)
85+
pub struct IntegralDivideBuilder;
86+
87+
impl ExpressionBuilder for IntegralDivideBuilder {
88+
fn build(
89+
&self,
90+
spark_expr: &Expr,
91+
input_schema: SchemaRef,
92+
planner: &PhysicalPlanner,
93+
) -> Result<Arc<dyn PhysicalExpr>, ExecutionError> {
94+
let expr = extract_expr!(spark_expr, IntegralDivide);
95+
let eval_mode = from_protobuf_eval_mode(expr.eval_mode)?;
96+
planner.create_binary_expr_with_options(
97+
expr.left.as_ref().unwrap(),
98+
expr.right.as_ref().unwrap(),
99+
expr.return_type.as_ref(),
100+
DataFusionOperator::Divide,
101+
input_schema,
102+
BinaryExprOptions {
103+
is_integral_div: true,
104+
},
105+
eval_mode,
106+
)
107+
}
108+
}
109+
110+
/// Builder for Remainder expressions (uses special modulo function)
111+
pub struct RemainderBuilder;
112+
113+
impl ExpressionBuilder for RemainderBuilder {
114+
fn build(
115+
&self,
116+
spark_expr: &Expr,
117+
input_schema: SchemaRef,
118+
planner: &PhysicalPlanner,
119+
) -> Result<Arc<dyn PhysicalExpr>, ExecutionError> {
120+
let expr = extract_expr!(spark_expr, Remainder);
121+
let eval_mode = from_protobuf_eval_mode(expr.eval_mode)?;
122+
let left = planner.create_expr(expr.left.as_ref().unwrap(), Arc::clone(&input_schema))?;
123+
let right = planner.create_expr(expr.right.as_ref().unwrap(), Arc::clone(&input_schema))?;
124+
125+
let result = create_modulo_expr(
126+
left,
127+
right,
128+
expr.return_type
129+
.as_ref()
130+
.map(crate::execution::serde::to_arrow_datatype)
131+
.unwrap(),
132+
input_schema,
133+
eval_mode == EvalMode::Ansi,
134+
&planner.session_ctx().state(),
135+
);
136+
result.map_err(|e| ExecutionError::GeneralError(e.to_string()))
137+
}
138+
}
139+
140+
/// Builder for UnaryMinus expressions (uses special negate function)
141+
pub struct UnaryMinusBuilder;
142+
143+
impl ExpressionBuilder for UnaryMinusBuilder {
144+
fn build(
145+
&self,
146+
spark_expr: &Expr,
147+
input_schema: SchemaRef,
148+
planner: &PhysicalPlanner,
149+
) -> Result<Arc<dyn PhysicalExpr>, ExecutionError> {
150+
let expr = extract_expr!(spark_expr, UnaryMinus);
151+
let child = planner.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
152+
let result = create_negate_expr(child, expr.fail_on_error);
153+
result.map_err(|e| ExecutionError::GeneralError(e.to_string()))
154+
}
155+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Bitwise expression builders
19+
20+
use datafusion::logical_expr::Operator as DataFusionOperator;
21+
22+
use crate::binary_expr_builder;
23+
24+
/// Macro to define all bitwise builders at once
25+
macro_rules! define_bitwise_builders {
26+
($(($builder:ident, $expr_type:ident, $op:expr)),* $(,)?) => {
27+
$(
28+
binary_expr_builder!($builder, $expr_type, $op);
29+
)*
30+
};
31+
}
32+
33+
define_bitwise_builders![
34+
(
35+
BitwiseAndBuilder,
36+
BitwiseAnd,
37+
DataFusionOperator::BitwiseAnd
38+
),
39+
(BitwiseOrBuilder, BitwiseOr, DataFusionOperator::BitwiseOr),
40+
(
41+
BitwiseXorBuilder,
42+
BitwiseXor,
43+
DataFusionOperator::BitwiseXor
44+
),
45+
(
46+
BitwiseShiftLeftBuilder,
47+
BitwiseShiftLeft,
48+
DataFusionOperator::BitwiseShiftLeft
49+
),
50+
(
51+
BitwiseShiftRightBuilder,
52+
BitwiseShiftRight,
53+
DataFusionOperator::BitwiseShiftRight
54+
),
55+
];
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Comparison expression builders
19+
20+
use datafusion::logical_expr::Operator as DataFusionOperator;
21+
22+
use crate::binary_expr_builder;
23+
24+
/// Macro to define all comparison builders at once
25+
macro_rules! define_comparison_builders {
26+
($(($builder:ident, $expr_type:ident, $op:expr)),* $(,)?) => {
27+
$(
28+
binary_expr_builder!($builder, $expr_type, $op);
29+
)*
30+
};
31+
}
32+
33+
define_comparison_builders![
34+
(EqBuilder, Eq, DataFusionOperator::Eq),
35+
(NeqBuilder, Neq, DataFusionOperator::NotEq),
36+
(LtBuilder, Lt, DataFusionOperator::Lt),
37+
(LtEqBuilder, LtEq, DataFusionOperator::LtEq),
38+
(GtBuilder, Gt, DataFusionOperator::Gt),
39+
(GtEqBuilder, GtEq, DataFusionOperator::GtEq),
40+
(
41+
EqNullSafeBuilder,
42+
EqNullSafe,
43+
DataFusionOperator::IsNotDistinctFrom
44+
),
45+
(
46+
NeqNullSafeBuilder,
47+
NeqNullSafe,
48+
DataFusionOperator::IsDistinctFrom
49+
),
50+
];
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Logical expression builders
19+
20+
use datafusion::logical_expr::Operator as DataFusionOperator;
21+
use datafusion::physical_expr::expressions::NotExpr;
22+
23+
use crate::{binary_expr_builder, unary_expr_builder};
24+
25+
/// Macro to define all logical builders at once
26+
macro_rules! define_logical_builders {
27+
() => {
28+
binary_expr_builder!(AndBuilder, And, DataFusionOperator::And);
29+
binary_expr_builder!(OrBuilder, Or, DataFusionOperator::Or);
30+
unary_expr_builder!(NotBuilder, Not, NotExpr::new);
31+
};
32+
}
33+
34+
define_logical_builders!();

native/core/src/execution/expressions/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@
1717

1818
//! Native DataFusion expressions
1919
20+
pub mod arithmetic;
21+
pub mod bitwise;
22+
pub mod comparison;
23+
pub mod logical;
24+
pub mod nullcheck;
2025
pub mod subquery;
2126

2227
pub use datafusion_comet_spark_expr::EvalMode;
28+
29+
// Re-export the extract_expr macro for convenience in expression builders
30+
pub use crate::extract_expr;
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Null check expression builders
19+
20+
use datafusion::physical_expr::expressions::{IsNotNullExpr, IsNullExpr};
21+
22+
use crate::unary_expr_builder;
23+
24+
/// Macro to define all null check builders at once
25+
macro_rules! define_null_check_builders {
26+
() => {
27+
unary_expr_builder!(IsNullBuilder, IsNull, IsNullExpr::new);
28+
unary_expr_builder!(IsNotNullBuilder, IsNotNull, IsNotNullExpr::new);
29+
};
30+
}
31+
32+
define_null_check_builders!();

0 commit comments

Comments
 (0)