Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions rust/lance-graph/src/datafusion_planner/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

use crate::ast::{BooleanExpression, PropertyValue, ValueExpression};
use datafusion::logical_expr::{col, lit, BinaryExpr, Expr, Operator};
use datafusion_functions_aggregate::average::avg;
use datafusion_functions_aggregate::count::count;
use datafusion_functions_aggregate::sum::sum;

Expand Down Expand Up @@ -87,14 +88,18 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr {
match name.to_lowercase().as_str() {
"count" => {
if args.len() == 1 {
// Check for COUNT(*)
let arg_expr = if let VE::Variable(v) = &args[0] {
if v == "*" {
// COUNT(*) - count all rows including NULLs
lit(1)
} else {
to_df_value_expr(&args[0])
// COUNT(p) - count non-NULL rows by using a representative column
// Use <variable>__id as a null-sensitive column
// This ensures optional matches with NULL variables are not counted
col(format!("{}__id", v))
}
} else {
// COUNT(p.property) - count non-null values of that property
to_df_value_expr(&args[0])
};

Expand All @@ -107,14 +112,23 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr {
}
"sum" => {
if args.len() == 1 {
// Note: SUM(variable) is rejected by semantic validation
// So we only handle valid cases here
let arg_expr = to_df_value_expr(&args[0]);
// Use DataFusion's sum helper function
sum(arg_expr)
} else {
// Invalid argument count - return placeholder
lit(0)
}
}
"avg" => {
if args.len() == 1 {
let arg_expr = to_df_value_expr(&args[0]);
avg(arg_expr)
} else {
lit(0)
}
}
_ => {
// Unsupported function - return placeholder for now
lit(0)
Expand Down Expand Up @@ -520,6 +534,25 @@ mod tests {
assert!(s.contains("p__amount"), "Should contain column reference");
}

#[test]
fn test_value_expr_function_avg() {
let expr = ValueExpression::Function {
name: "AVG".into(),
args: vec![ValueExpression::Property(PropertyRef {
variable: "p".into(),
property: "amount".into(),
})],
};

let df_expr = to_df_value_expr(&expr);
let s = format!("{:?}", df_expr);
assert!(
s.contains("avg") || s.contains("Avg"),
"Should be AVG function"
);
assert!(s.contains("p__amount"), "Should contain column reference");
}

// ========================================================================
// Unit tests for contains_aggregate()
// ========================================================================
Expand Down
144 changes: 144 additions & 0 deletions rust/lance-graph/src/semantic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,20 @@ impl SemanticAnalyzer {
location: snafu::Location::new(file!(), line!(), column!()),
});
}

// Additional validation for SUM, AVG, MIN, MAX: they require properties, not bare variables
// Only COUNT allows bare variables (COUNT(*) or COUNT(p))
if matches!(name.to_lowercase().as_str(), "sum" | "avg" | "min" | "max") {
if let Some(ValueExpression::Variable(v)) = args.first() {
return Err(GraphError::PlanError {
message: format!(
"{}({}) is invalid - {} requires a property like {}({}.property). You cannot {} a node/entity.",
name.to_uppercase(), v, name.to_uppercase(), name.to_uppercase(), v, name.to_lowercase()
),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
}
}
_ => {
// Other functions - no validation yet
Expand Down Expand Up @@ -957,6 +971,136 @@ mod tests {
);
}

#[test]
fn test_sum_with_variable_fails_validation() {
let expr = ValueExpression::Function {
name: "sum".to_string(),
args: vec![ValueExpression::Variable("n".to_string())],
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
!result.errors.is_empty(),
"Expected SUM(variable) to produce validation errors"
);
let has_sum_error = result
.errors
.iter()
.any(|e| e.contains("SUM(n) is invalid") && e.contains("requires a property"));
assert!(
has_sum_error,
"Expected error about SUM requiring property, got: {:?}",
result.errors
);
}

#[test]
fn test_avg_with_variable_fails_validation() {
let expr = ValueExpression::Function {
name: "avg".to_string(),
args: vec![ValueExpression::Variable("n".to_string())],
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
!result.errors.is_empty(),
"Expected AVG(variable) to produce validation errors"
);
let has_avg_error = result
.errors
.iter()
.any(|e| e.contains("AVG(n) is invalid") && e.contains("requires a property"));
assert!(
has_avg_error,
"Expected error about AVG requiring property, got: {:?}",
result.errors
);
}

#[test]
fn test_sum_with_property_passes_validation() {
let expr = ValueExpression::Function {
name: "sum".to_string(),
args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result.errors.is_empty(),
"SUM with property should pass validation, got errors: {:?}",
result.errors
);
}

#[test]
fn test_min_with_variable_fails_validation() {
let expr = ValueExpression::Function {
name: "min".to_string(),
args: vec![ValueExpression::Variable("n".to_string())],
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
!result.errors.is_empty(),
"Expected MIN(variable) to produce validation errors"
);
let has_min_error = result
.errors
.iter()
.any(|e| e.contains("MIN(n) is invalid") && e.contains("requires a property"));
assert!(
has_min_error,
"Expected error about MIN requiring property, got: {:?}",
result.errors
);
}

#[test]
fn test_max_with_variable_fails_validation() {
let expr = ValueExpression::Function {
name: "max".to_string(),
args: vec![ValueExpression::Variable("n".to_string())],
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
!result.errors.is_empty(),
"Expected MAX(variable) to produce validation errors"
);
let has_max_error = result
.errors
.iter()
.any(|e| e.contains("MAX(n) is invalid") && e.contains("requires a property"));
assert!(
has_max_error,
"Expected error about MAX requiring property, got: {:?}",
result.errors
);
}

#[test]
fn test_min_with_property_passes_validation() {
let expr = ValueExpression::Function {
name: "min".to_string(),
args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result.errors.is_empty(),
"MIN with property should pass validation, got errors: {:?}",
result.errors
);
}

#[test]
fn test_max_with_property_passes_validation() {
let expr = ValueExpression::Function {
name: "max".to_string(),
args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result.errors.is_empty(),
"MAX with property should pass validation, got errors: {:?}",
result.errors
);
}

#[test]
fn test_arithmetic_with_non_numeric_literal_error() {
// RETURN "x" + 1
Expand Down
Loading
Loading