From 77b9ff0454fe308d6b742cc3d073e98e3e628687 Mon Sep 17 00:00:00 2001 From: beinan Date: Wed, 15 Oct 2025 07:47:29 +0000 Subject: [PATCH] feat(graph): support variable-length paths (1..N) with capped unrolling; --- python/DEVELOPMENT.md | 30 +++++++++----- python/python/tests/test_graph.py | 1 - rust/lance-graph/src/lib.rs | 5 ++- rust/lance-graph/src/query.rs | 68 +++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 12 deletions(-) diff --git a/python/DEVELOPMENT.md b/python/DEVELOPMENT.md index 9effb0fb..840a9524 100644 --- a/python/DEVELOPMENT.md +++ b/python/DEVELOPMENT.md @@ -2,30 +2,40 @@ ## Building the project -This project is built with [maturin](https://github.com/PyO3/maturin). +This project is built with [maturin](https://github.com/PyO3/maturin) and uses +[uv](https://docs.astral.sh/uv/) to manage a local virtual environment. -It can be built in development mode with: +Recommended uv workflow: ```shell +cd python +uv venv --python 3.11 .venv +source .venv/bin/activate +uv pip install maturin[patchelf] +uv pip install -e '.[tests]' maturin develop ``` -This builds the Rust native module in place. You will need to re-run this -whenever you change the Rust code. But changing the Python code doesn't require -re-building. +Notes: +- If another virtual environment is active, run `deactivate` first so uv binds to `.venv`. +- After changing Rust code, re-run `maturin develop`. Pure-Python changes do not require rebuilds. ## Running tests -To run the tests, first install the test packages: +You can run tests either via the Makefile (uses uv under the hood) or directly with uv. + +Using Makefile (recommended): ```shell -pip install '.[tests]' +cd python +make test ``` -then: +Directly with uv: ```shell -make test +cd python +uv run pytest -v python/python/tests ``` To check the documentation examples, use @@ -72,7 +82,7 @@ From now any, any attempt to commit, will first run the linters against the modified files: ```shell -$ git commit -m"Changed some python files" +$ git commit -m "Changed some python files" black....................................................................Passed isort (python)...........................................................Passed ruff.....................................................................Passed diff --git a/python/python/tests/test_graph.py b/python/python/tests/test_graph.py index 9635b5b1..fda44b10 100644 --- a/python/python/tests/test_graph.py +++ b/python/python/tests/test_graph.py @@ -158,7 +158,6 @@ def test_two_hop_friends_of_friends(graph_env): assert set(data["c_id"]) == {4} -@pytest.mark.xfail(reason="Variable-length path (*1..2) support pending in executor") def test_variable_length_path(graph_env): config, datasets, _ = graph_env query = CypherQuery( diff --git a/rust/lance-graph/src/lib.rs b/rust/lance-graph/src/lib.rs index 5304d9b7..1001f11c 100644 --- a/rust/lance-graph/src/lib.rs +++ b/rust/lance-graph/src/lib.rs @@ -11,7 +11,7 @@ //! //! - Cypher query parsing and AST representation //! - Graph pattern matching on columnar data -//! - Property graph interpretation of Lance datasets +//! - Property graph interpretation of Lance datasets //! - Translation to optimized SQL via DataFusion //! - Support for nodes, relationships, and properties //! @@ -47,6 +47,9 @@ pub mod query_processor; pub mod semantic; pub mod source_catalog; +/// Maximum allowed hops for variable-length relationship expansion (e.g., *1..N) +pub const MAX_VARIABLE_LENGTH_HOPS: u32 = 20; + pub use config::{GraphConfig, NodeMapping, RelationshipMapping}; pub use error::{GraphError, Result}; pub use query::CypherQuery; diff --git a/rust/lance-graph/src/query.rs b/rust/lance-graph/src/query.rs index a78a365a..c1ed99ce 100644 --- a/rust/lance-graph/src/query.rs +++ b/rust/lance-graph/src/query.rs @@ -759,6 +759,74 @@ impl CypherQuery { message: "Graph configuration is required for execution".to_string(), location: snafu::Location::new(file!(), line!(), column!()), })?; + + // Handle single-segment variable-length paths by unrolling ranges (*1..N, capped) + if path.segments.len() == 1 { + if let Some(length_range) = &path.segments[0].relationship.length { + let cap: u32 = crate::MAX_VARIABLE_LENGTH_HOPS; + let min_len = length_range.min.unwrap_or(1).max(1); + let max_len = length_range.max.unwrap_or(cap); + + if min_len > max_len { + return Err(GraphError::InvalidPattern { + message: format!( + "Invalid variable-length range: min {:?} greater than max {:?}", + length_range.min, length_range.max + ), + location: snafu::Location::new(file!(), line!(), column!()), + }); + } + + if max_len > cap { + return Err(GraphError::UnsupportedFeature { + feature: format!( + "Variable-length paths with length > {} are not supported (got {:?}..{:?})", + cap, length_range.min, length_range.max + ), + location: snafu::Location::new(file!(), line!(), column!()), + }); + } + + use datafusion::dataframe::DataFrame; + let mut union_df: Option = None; + + for hops in min_len..=max_len { + // Build a fixed-length synthetic path by repeating the single segment + let mut synthetic = crate::ast::PathPattern { + start_node: path.start_node.clone(), + segments: Vec::with_capacity(hops as usize), + }; + + for i in 0..hops { + let mut seg = path.segments[0].clone(); + // Drop variables to avoid alias collisions on repeated hops + seg.relationship.variable = None; + if (i + 1) < hops { + seg.end_node.variable = None; // intermediate hop + } + // Clear length spec for this fixed hop + seg.relationship.length = None; + synthetic.segments.push(seg); + } + + let exec = PathExecutor::new(ctx, cfg, &synthetic)?; + let mut df = exec.build_chain().await?; + df = exec.apply_where(df, &self.ast)?; + df = exec.apply_return(df, &self.ast)?; + + union_df = Some(match union_df { + Some(acc) => acc.union(df).map_err(|e| GraphError::PlanError { + message: format!("Failed to UNION variable-length paths: {}", e), + location: snafu::Location::new(file!(), line!(), column!()), + })?, + None => df, + }); + } + + return Ok(union_df); + } + } + let exec = PathExecutor::new(ctx, cfg, path)?; let df = exec.build_chain().await?; let df = exec.apply_where(df, &self.ast)?;