From 6a5f5dd259c32cf745034ec883591feca2ec4fea Mon Sep 17 00:00:00 2001 From: leiyuou Date: Wed, 19 Nov 2025 20:21:51 -0800 Subject: [PATCH 1/2] add --- .../src/datafusion_planner/expression.rs | 2 + .../tests/test_datafusion_pipeline.rs | 139 ++++++++++++++++++ 2 files changed, 141 insertions(+) diff --git a/rust/lance-graph/src/datafusion_planner/expression.rs b/rust/lance-graph/src/datafusion_planner/expression.rs index 21b28208..0468720e 100644 --- a/rust/lance-graph/src/datafusion_planner/expression.rs +++ b/rust/lance-graph/src/datafusion_planner/expression.rs @@ -56,6 +56,8 @@ pub(crate) fn to_df_boolean_expr(expr: &BooleanExpression) -> Expr { BE::Exists(prop) => Expr::IsNotNull(Box::new(to_df_value_expr( &ValueExpression::Property(prop.clone()), ))), + BE::IsNull(expression) => Expr::IsNull(Box::new(to_df_value_expr(expression))), + BE::IsNotNull(expression) => Expr::IsNotNull(Box::new(to_df_value_expr(expression))), _ => lit(true), } } diff --git a/rust/lance-graph/tests/test_datafusion_pipeline.rs b/rust/lance-graph/tests/test_datafusion_pipeline.rs index 177afa66..2bfa868c 100644 --- a/rust/lance-graph/tests/test_datafusion_pipeline.rs +++ b/rust/lance-graph/tests/test_datafusion_pipeline.rs @@ -3234,3 +3234,142 @@ async fn test_datafusion_shared_variable_distinct() { "DISTINCT should eliminate duplicates" ); } + +#[tokio::test] +async fn test_datafusion_is_null_node_property() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + let query = CypherQuery::new("MATCH (p:Person) WHERE p.city IS NULL RETURN p.name") + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query.execute_datafusion(datasets).await.unwrap(); + + assert_eq!(result.num_rows(), 1); + assert_eq!(result.num_columns(), 1); + + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(names.value(0), "David"); +} + +#[tokio::test] +async fn test_datafusion_is_not_null_node_property() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + let query = CypherQuery::new("MATCH (p:Person) WHERE p.city IS NOT NULL RETURN p.name") + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query.execute_datafusion(datasets).await.unwrap(); + + assert_eq!(result.num_rows(), 4); + assert_eq!(result.num_columns(), 1); + + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + + let name_set: std::collections::HashSet = (0..result.num_rows()) + .map(|i| names.value(i).to_string()) + .collect(); + let expected: std::collections::HashSet = ["Alice", "Bob", "Charlie", "Eve"] + .iter() + .map(|s| s.to_string()) + .collect(); + assert_eq!(name_set, expected); +} + +#[tokio::test] +async fn test_datafusion_is_null_relationship_property() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + let knows_batch = create_knows_dataset(); + + let query = CypherQuery::new( + "MATCH (a:Person)-[r:KNOWS]->(b:Person) \ + WHERE r.since_year IS NULL \ + RETURN a.name, b.name", + ) + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + datasets.insert("KNOWS".to_string(), knows_batch); + + let result = query.execute_datafusion(datasets).await.unwrap(); + + assert_eq!(result.num_rows(), 1); + assert_eq!(result.num_columns(), 2); + + let a_names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let b_names = result + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + + assert_eq!(a_names.value(0), "David"); + assert_eq!(b_names.value(0), "Eve"); +} + +#[tokio::test] +async fn test_datafusion_is_not_null_relationship_property() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + let knows_batch = create_knows_dataset(); + + let query = CypherQuery::new( + "MATCH (a:Person)-[r:KNOWS]->(b:Person) \ + WHERE r.since_year IS NOT NULL \ + RETURN a.name, b.name", + ) + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + datasets.insert("KNOWS".to_string(), knows_batch); + + let result = query.execute_datafusion(datasets).await.unwrap(); + + assert_eq!(out.num_rows(), 4); + + let a_names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let b_names = result + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + + for i in 0..result.num_rows() { + let a = a_names.value(i); + let b = b_names.value(i); + assert!( + !(a == "David" && b == "Eve"), + "David -> Eve should be filtered out by IS NOT NULL" + ); + } +} From ff65c33b23156732d62969b4327a3091c0b80f71 Mon Sep 17 00:00:00 2001 From: leiyuou Date: Wed, 19 Nov 2025 20:24:50 -0800 Subject: [PATCH 2/2] fixup --- rust/lance-graph/tests/test_datafusion_pipeline.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance-graph/tests/test_datafusion_pipeline.rs b/rust/lance-graph/tests/test_datafusion_pipeline.rs index 2bfa868c..9de2cbc3 100644 --- a/rust/lance-graph/tests/test_datafusion_pipeline.rs +++ b/rust/lance-graph/tests/test_datafusion_pipeline.rs @@ -3351,7 +3351,7 @@ async fn test_datafusion_is_not_null_relationship_property() { let result = query.execute_datafusion(datasets).await.unwrap(); - assert_eq!(out.num_rows(), 4); + assert_eq!(result.num_rows(), 4); let a_names = result .column(0)