From 76b9c2d618ca6e073c91d0131a7f6a4ae3aec9b7 Mon Sep 17 00:00:00 2001
From: "jianjian.xie" <jja725@gmail.com>
Date: Mon, 22 Jun 2026 13:45:26 -0700
Subject: [PATCH 01/10] docs: add Phase 2 CSR native expand operator design

Design spec for issue #159 Phase 2: wire the Phase 1 CsrIndex into a
native single-hop Expand via custom DataFusion ExecutionPlan
(CsrExpandExec topology + LanceTakeExec materialization), dense-ROWID
id model, with fallback to the DataFusion join path for unsupported
shapes.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 ...06-22-csr-native-expand-operator-design.md | 179 ++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 docs/superpowers/specs/2026-06-22-csr-native-expand-operator-design.md

diff --git a/docs/superpowers/specs/2026-06-22-csr-native-expand-operator-design.md b/docs/superpowers/specs/2026-06-22-csr-native-expand-operator-design.md
new file mode 100644
index 00000000..d4bc2678
--- /dev/null
+++ b/docs/superpowers/specs/2026-06-22-csr-native-expand-operator-design.md
@@ -0,0 +1,179 @@
+# Phase 2: CSR-backed native single-hop Expand operator
+
+Issue: [lance-format/lance-graph#159](https://github.com/lance-format/lance-graph/issues/159)
+Status: Approved design (2026-06-22)
+Builds on: Phase 1 — `CsrIndex` (PR #160, commit `c7e4f18`)
+
+## Problem
+
+Today every Cypher traversal is lowered to DataFusion SQL joins. A single-hop
+`(a)-[:KNOWS]->(b)` becomes a relationship scan plus two inner joins. Phase 1
+added an in-memory `CsrIndex` (dense `u64` adjacency with `neighbors()`, `bfs()`,
+`shortest_path()`, Arrow (de)serialization) but nothing consumes it —
+`LanceNativePlanner` is a placeholder that returns `EmptyRelation`, and
+`ExecutionStrategy::LanceNative` errors with "not yet implemented".
+
+Phase 2 wires CSR into a real native execution path for **single-hop `Expand`**,
+replacing the join with a direct neighbor lookup followed by a `take()`.
+
+## Foundational decisions (locked)
+
+1. **Execution model — custom DataFusion `ExecutionPlan` (DuckPGQ-style).**
+   A logical extension node lowers to a streaming physical operator that does
+   neighbor lookups at execution time and composes with the rest of the
+   DataFusion pipeline. (Alternatives considered: DataFusion-materialized
+   MemTable; pure-native bypassing DataFusion. Rejected — less faithful / less
+   composable.)
+
+2. **ID mapping — dense ROWID model.** The CSR vertex id *is* the node's row id.
+   `csr.neighbors(src_rowid) -> dst_rowid`s; target properties come from
+   `take(dst_rowids)`. This mirrors how every Lance index works (key → row ids →
+   `take()` to materialize) and reuses Lance's addressing instead of inventing a
+   dictionary. For Phase 2 (in-memory, single fragment) "row id == dense row
+   offset"; this generalizes to **Lance stable row ids** in Phase 4.
+
+3. **Output contract — properties materialized via `take()`.** `Expand` does not
+   fall back when target properties are projected. The neighbor row ids feed a
+   `take()` that materializes the referenced target columns, so `RETURN b.name`
+   runs fully native.
+
+4. **`take()` placement — a separate `LanceTakeExec` operator.**
+   `CsrExpandExec` does *topology only* (row id → neighbor row ids);
+   `LanceTakeExec` does *materialization only* via a `RowMaterializer`
+   abstraction. Single-purpose operators, reused by Phase 3 (multi-hop) and
+   Phase 5 (hybrid vector). Mirrors Lance's own scan+take shape.
+
+## Architecture & modules
+
+Promote `crates/lance-graph/src/lance_native_planner.rs` to a module directory:
+
+| File | Responsibility |
+|---|---|
+| `lance_native_planner/mod.rs` | `LanceNativePlanner` — native-vs-fallback decision; lowering that overrides only `Expand` |
+| `lance_native_planner/csr_expand.rs` | `CsrExpandNode` (logical extension) + `CsrExpandExec` (physical): row id → neighbor row ids |
+| `lance_native_planner/take.rs` | `LanceTakeNode` + `LanceTakeExec` + `RowMaterializer` trait + `InMemoryMaterializer` |
+| `lance_native_planner/extension_planner.rs` | `ExtensionPlanner` mapping logical nodes → physical execs (builds CSR + materializer at plan time); `CsrQueryPlanner` wrapping `DefaultPhysicalPlanner` |
+
+## Data flow
+
+For `MATCH (a:Person)-[:KNOWS]->(b:Person) WHERE b.age > 30 RETURN a.name, b.name`:
+
+```
+TableScan(person AS a)        a__id(=rowid), a__name, a__age        [reuse existing scan_ops]
+ └─ CsrExpandExec             + b__id  (one row per neighbor; a__* carried through, b__id = neighbor row id)
+     └─ LanceTakeExec         + b__name, b__age  (take all other target cols by offset)
+         └─ Filter(b__age>30) [DataFusion native physical op]
+             └─ Project(...)  [DataFusion native physical op]
+```
+
+`CsrExpandExec` and `LanceTakeExec` produce a normal `RecordBatch` stream with a
+correct qualified schema (`{var}__{col}`), so all operators above (Filter,
+Project, Sort, Limit, Distinct, Offset) are ordinary DataFusion physical
+operators.
+
+## The planner: reuse + override
+
+`LanceNativePlanner` holds a `DataFusionPlanner` and **overrides only `Expand`
+lowering**; every other `LogicalOperator` delegates to the existing crate-internal
+builders (`pub(crate)`), so scans/projects/filters/limits behave identically to
+the DataFusion path.
+
+- **`Expand` →** `CsrExpandNode` (appends `b__<target_id_field>` — the neighbor
+  row id, which under the dense model equals the target's id value) wrapped by
+  `LanceTakeNode` (appends **all remaining target node columns**, qualified
+  `b__<col>`).
+- **Materialize all target columns, not just referenced ones.** This is exactly
+  what the DataFusion target scan does (`build_qualified_target_scan` projects
+  every target field), so the native output schema matches the join path's column
+  set and every `b__col` is available to downstream Project/Filter/Sort. It also
+  removes any need to walk expressions collecting `b.*` references. Projection
+  pushdown (materializing only needed columns) is a later optimization.
+- Source row-id column = `a__<source_id_field>` (dense-ROWID assumption; becomes
+  `a___rowid` in Phase 4).
+- The neighbor column emitted by `CsrExpandExec` is named `b__<target_id_field>`
+  (e.g. `b__id`), so `RETURN b.id` is served directly and `LanceTakeExec` reuses
+  that column as the row-id input. `LanceTakeExec` therefore materializes the
+  target columns *other than* the id field.
+- Output column naming stays `{var}__{col}` to match the DataFusion path exactly.
+
+### Native-vs-fallback rule
+
+A query is served natively **iff** its plan contains exactly one single-hop
+`Expand` with a single relationship type and direction Outgoing or Incoming.
+**Otherwise the entire query delegates to `DataFusionPlanner`.** Falls back for:
+`VariableLengthExpand`, more than one `Expand` (multi-hop), `Expand` with more
+than one relationship type, `Undirected` direction, `Join`, `Unwind`.
+
+Consequence: `ExecutionStrategy::LanceNative` is always correct on valid Cypher —
+it never errors, it just uses joins when CSR cannot serve the shape.
+
+## CSR construction & RowMaterializer
+
+- `ExtensionPlanner::plan_extension` (async) looks up the edge table and target
+  node table from `SessionState`, collects them, and builds:
+  - `Arc<CsrIndex>` via the Phase 1 builder. **Add
+    `CsrIndexBuilder::add_edges_from_batch_with_columns(batch, src_col, dst_col)`**
+    so it uses the real `RelationshipMapping` field names
+    (`source_id_field`/`target_id_field`) instead of the hardcoded
+    `src_id`/`dst_id`. Outgoing builds `(src→dst)`; Incoming reverses to `(dst→src)`.
+  - `InMemoryMaterializer { batch }` over the collected target node table.
+- `RowMaterializer` trait:
+  ```rust
+  trait RowMaterializer: Send + Sync {
+      fn schema(&self) -> SchemaRef;
+      fn take(&self, row_ids: &UInt64Array, columns: &[String]) -> Result<RecordBatch>;
+  }
+  ```
+  In-memory impl = `arrow::compute::take` by offset — O(1) random access, the
+  concrete reason CSR beats the hash join. The take node materializes **all**
+  target columns except the id field. Phase 4 adds a `LanceDatasetMaterializer`
+  backed by `LanceDataset::take`.
+- CSR is built once per physical planning (rebuild per query is acceptable for
+  Phase 2; Phase 4 persists CSR as a Lance dataset).
+- `num_vertices` is inferred from the edge data (Phase 1 default = max id + 1);
+  source ids beyond range yield empty neighbors (no error), matching Phase 1.
+
+## query.rs wiring
+
+- Build the native `SessionContext` via
+  `SessionStateBuilder::new().with_default_features().with_query_planner(Arc::new(CsrQueryPlanner)).build()`.
+  `CsrQueryPlanner` wraps `DefaultPhysicalPlanner` with the `ExtensionPlanner`.
+- `create_logical_plans` gains a planner choice (DataFusion vs LanceNative).
+- The **in-memory `execute(datasets, Some(LanceNative))` path is fully wired and
+  tested.** It builds the native context, plans with `LanceNativePlanner`, and
+  executes.
+- The **namespace native path stays `UnsupportedFeature` for Phase 2** — it needs
+  the Lance-dataset materializer, which is Phase 4 (persistence).
+
+## Error handling
+
+Reuse `GraphError` with `snafu::Location`. Missing or wrong-typed edge columns at
+CSR-build time and take failures surface as `PlanError` / `ExecutionError`. Source
+row ids beyond CSR range produce empty neighbors rather than an error.
+
+## Testing
+
+- **Operator unit tests**
+  - `CsrExpandExec`: outgoing, incoming, vertex with no neighbors, source id out
+    of range, input split across multiple batches, carry-through of source columns.
+  - `LanceTakeExec`: take correctness by offset, column subset selection, empty
+    input, row-id column dropped from output.
+  - `CsrIndexBuilder::add_edges_from_batch_with_columns`: custom column names,
+    reversed (incoming) build.
+- **Planner tests**
+  - Supported shape lowers to a plan containing `CsrExpandExec` + `LanceTakeExec`.
+  - Each unsupported shape (var-length, two-hop, undirected, multi-type) falls
+    back to a join plan (`Join` present, no extension nodes).
+- **End-to-end parity** (`execute(datasets, …)` LanceNative vs DataFusion return
+  identical results)
+  - `MATCH (a:Person)-[:KNOWS]->(b:Person) RETURN a.name, b.name`
+  - same with `WHERE b.age > 30`
+  - same with `LIMIT`
+  - incoming direction `(a)<-[:KNOWS]-(b)`
+
+## Out of scope (later phases)
+
+- Multi-hop / `VariableLengthExpand`, BFS/DFS/shortest-path operators (Phase 3).
+- Persisting CSR as Lance datasets, incremental updates, Lance stable row ids,
+  `LanceDatasetMaterializer`, namespace native path (Phase 4).
+- Hybrid CSR + vector search (Phase 5).

From 87ee0d302ff833af3025470b7d13f7cdd06e82e7 Mon Sep 17 00:00:00 2001
From: "jianjian.xie" <jja725@gmail.com>
Date: Mon, 22 Jun 2026 14:38:59 -0700
Subject: [PATCH 02/10] docs: add Phase 2 CSR native expand implementation plan

7-task TDD plan implementing the approved design: generalize CSR builder,
CsrExpandNode/Exec, LanceTakeNode/Exec + RowMaterializer, CsrExtensionPlanner/
CsrQueryPlanner, LanceNativePlanner lowering with fallback, and query.rs wiring
of the LanceNative execution strategy with end-to-end parity tests.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../2026-06-22-csr-native-expand-operator.md  | 2112 +++++++++++++++++
 1 file changed, 2112 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-06-22-csr-native-expand-operator.md

diff --git a/docs/superpowers/plans/2026-06-22-csr-native-expand-operator.md b/docs/superpowers/plans/2026-06-22-csr-native-expand-operator.md
new file mode 100644
index 00000000..0b95e068
--- /dev/null
+++ b/docs/superpowers/plans/2026-06-22-csr-native-expand-operator.md
@@ -0,0 +1,2112 @@
+# Phase 2: CSR-backed Native Single-Hop Expand — Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Execute single-hop Cypher `Expand` natively via the Phase 1 `CsrIndex` — a custom DataFusion `ExecutionPlan` does neighbor lookups (`CsrExpandExec`) and a `take()` materializes target columns (`LanceTakeExec`), instead of relationship-scan + two joins.
+
+**Architecture:** `LanceNativePlanner` wraps `DataFusionPlanner` and overrides only `Expand` lowering, emitting two logical extension nodes (`CsrExpandNode`, `LanceTakeNode`) over the existing source scan; anything it can't serve delegates to the join path. A `CsrQueryPlanner` registered on the `SessionContext` turns those nodes into physical operators, building the CSR and an in-memory row materializer at physical-planning time. Dense-ROWID id model: CSR vertex id == node row id; target props via `take()`.
+
+**Tech Stack:** Rust, DataFusion 50.3 (`UserDefinedLogicalNodeCore`, `ExtensionPlanner`, `QueryPlanner`), Arrow 56.2 (`arrow::compute::{take, cast}`), async-trait 0.1, tokio (tests).
+
+**Spec:** `docs/superpowers/specs/2026-06-22-csr-native-expand-operator-design.md`
+
+---
+
+## File Structure
+
+| File | Action | Responsibility |
+|---|---|---|
+| `crates/lance-graph/src/csr_index.rs` | Modify | Add `add_edges_from_batch_with_columns` |
+| `crates/lance-graph/src/datafusion_planner/mod.rs` | Modify | `mod expression;` → `pub(crate) mod expression;` |
+| `crates/lance-graph/src/lance_native_planner.rs` | Delete | Replaced by directory module |
+| `crates/lance-graph/src/lance_native_planner/mod.rs` | Create | `LanceNativePlanner`: native-vs-fallback + lowering; re-exports |
+| `crates/lance-graph/src/lance_native_planner/direction.rs` | Create | `NativeDirection` enum |
+| `crates/lance-graph/src/lance_native_planner/take.rs` | Create | `RowMaterializer`, `InMemoryMaterializer`, `take_batch`, `LanceTakeNode`, `LanceTakeExec` |
+| `crates/lance-graph/src/lance_native_planner/csr_expand.rs` | Create | `expand_batch`, `CsrExpandNode`, `CsrExpandExec` |
+| `crates/lance-graph/src/lance_native_planner/extension_planner.rs` | Create | `CsrExtensionPlanner`, `CsrQueryPlanner` |
+| `crates/lance-graph/src/query.rs` | Modify | Wire `ExecutionStrategy::LanceNative` (in-memory datasets path) |
+| `crates/lance-graph/tests/test_lance_native_expand.rs` | Create | End-to-end parity tests |
+
+Run all crate tests with: `cargo test -p lance-graph`
+Run a single test with: `cargo test -p lance-graph <test_name> -- --exact` (or `cargo test -p lance-graph --test test_lance_native_expand <name>` for integration tests).
+
+---
+
+## Task 1: Generalize CSR builder with custom edge column names
+
+**Files:**
+- Modify: `crates/lance-graph/src/csr_index.rs:240-274` (the `add_edges_from_batch` method) and its test module.
+
+- [ ] **Step 1: Write the failing test**
+
+Add to the `tests` module in `crates/lance-graph/src/csr_index.rs` (before the closing `}`):
+
+```rust
+    #[test]
+    fn test_build_from_record_batch_custom_columns() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("src_person_id", DataType::UInt64, false),
+            Field::new("dst_person_id", DataType::UInt64, false),
+        ]));
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(UInt64Array::from(vec![0, 0, 1])),
+                Arc::new(UInt64Array::from(vec![1, 2, 2])),
+            ],
+        )
+        .unwrap();
+
+        // Forward (outgoing): src -> dst
+        let idx = CsrIndexBuilder::new()
+            .add_edges_from_batch_with_columns(&batch, "src_person_id", "dst_person_id")
+            .unwrap()
+            .build();
+        assert_eq!(idx.neighbors(0), &[1, 2]);
+        assert_eq!(idx.neighbors(1), &[2]);
+
+        // Reversed (incoming): swap the column args -> dst -> src
+        let rev = CsrIndexBuilder::new()
+            .add_edges_from_batch_with_columns(&batch, "dst_person_id", "src_person_id")
+            .unwrap()
+            .build();
+        assert_eq!(rev.neighbors(2), &[0, 1]);
+        assert_eq!(rev.neighbors(1), &[0]);
+    }
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cargo test -p lance-graph test_build_from_record_batch_custom_columns`
+Expected: FAIL — `no method named add_edges_from_batch_with_columns`.
+
+- [ ] **Step 3: Implement**
+
+In `crates/lance-graph/src/csr_index.rs`, replace the existing `add_edges_from_batch` method (lines ~239-274) with a thin wrapper plus the generalized method:
+
+```rust
+    /// Add edges from an Arrow RecordBatch with `src_id` and `dst_id` columns.
+    pub fn add_edges_from_batch(self, batch: &RecordBatch) -> Result<Self> {
+        self.add_edges_from_batch_with_columns(batch, "src_id", "dst_id")
+    }
+
+    /// Add edges from an Arrow RecordBatch, reading source vertex ids from
+    /// `src_col` and destination vertex ids from `dst_col`.
+    ///
+    /// Both columns must be `UInt64`. To build a reversed (incoming/CSC) index,
+    /// pass the destination column name as `src_col` and vice versa.
+    pub fn add_edges_from_batch_with_columns(
+        mut self,
+        batch: &RecordBatch,
+        src_col: &str,
+        dst_col: &str,
+    ) -> Result<Self> {
+        let src_array = batch
+            .column_by_name(src_col)
+            .ok_or_else(|| GraphError::PlanError {
+                message: format!("Edge batch missing '{}' column", src_col),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?
+            .as_any()
+            .downcast_ref::<UInt64Array>()
+            .ok_or_else(|| GraphError::PlanError {
+                message: format!("'{}' column must be UInt64", src_col),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?;
+        let dst_array = batch
+            .column_by_name(dst_col)
+            .ok_or_else(|| GraphError::PlanError {
+                message: format!("Edge batch missing '{}' column", dst_col),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?
+            .as_any()
+            .downcast_ref::<UInt64Array>()
+            .ok_or_else(|| GraphError::PlanError {
+                message: format!("'{}' column must be UInt64", dst_col),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?;
+
+        for i in 0..batch.num_rows() {
+            self.edges.push((src_array.value(i), dst_array.value(i)));
+        }
+
+        Ok(self)
+    }
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `cargo test -p lance-graph csr_index`
+Expected: PASS (including the existing `test_build_from_record_batch`, which now routes through the wrapper).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add crates/lance-graph/src/csr_index.rs
+git commit -m "feat(csr): add_edges_from_batch_with_columns for custom edge column names"
+```
+
+---
+
+## Task 2: Module skeleton + `NativeDirection`
+
+Converts the placeholder file into a directory module so later tasks have a home. Keeps the existing `LanceNativePlanner` placeholder behavior compiling (it is rewritten in Task 6).
+
+**Files:**
+- Delete: `crates/lance-graph/src/lance_native_planner.rs`
+- Create: `crates/lance-graph/src/lance_native_planner/mod.rs`
+- Create: `crates/lance-graph/src/lance_native_planner/direction.rs`
+
+- [ ] **Step 1: Create the direction enum**
+
+Create `crates/lance-graph/src/lance_native_planner/direction.rs`:
+
+```rust
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Traversal direction for the native CSR expand operators.
+
+/// Direction a single-hop expand traverses. `Undirected` is intentionally
+/// absent — undirected expands fall back to the DataFusion join planner.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum NativeDirection {
+    /// Follow edges source -> destination (CSR).
+    Outgoing,
+    /// Follow edges destination -> source (CSC / reversed).
+    Incoming,
+}
+```
+
+- [ ] **Step 2: Move the placeholder into the new mod.rs**
+
+Delete `crates/lance-graph/src/lance_native_planner.rs` and create
+`crates/lance-graph/src/lance_native_planner/mod.rs` with the **exact previous
+contents** of the placeholder file, with one added line at the top of the module
+body to declare the submodule. The full file:
+
+```rust
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Lance Native physical planner (placeholder)
+//!
+//! Rewritten in Task 6 to lower single-hop `Expand` onto CSR-backed
+//! extension nodes. For now it keeps the original placeholder behavior so the
+//! crate compiles between tasks.
+
+mod direction;
+
+pub use direction::NativeDirection;
+
+use crate::config::GraphConfig;
+use crate::datafusion_planner::GraphPhysicalPlanner;
+use crate::error::Result;
+use crate::logical_plan::LogicalOperator;
+use datafusion::common::DFSchema;
+use datafusion::logical_expr::{EmptyRelation, LogicalPlan};
+use std::sync::Arc;
+
+/// Placeholder Lance-native planner
+pub struct LanceNativePlanner {
+    #[allow(dead_code)]
+    config: GraphConfig,
+}
+
+impl LanceNativePlanner {
+    pub fn new(config: GraphConfig) -> Self {
+        Self { config }
+    }
+}
+
+impl GraphPhysicalPlanner for LanceNativePlanner {
+    fn plan(&self, _logical_plan: &LogicalOperator) -> Result<LogicalPlan> {
+        let schema = Arc::new(DFSchema::empty());
+        Ok(LogicalPlan::EmptyRelation(EmptyRelation {
+            produce_one_row: false,
+            schema,
+        }))
+    }
+}
+```
+
+(The placeholder's `#[cfg(test)] mod tests { ... }` is dropped; Task 6 adds real tests.)
+
+- [ ] **Step 3: Build to verify the module restructure compiles**
+
+Run: `cargo build -p lance-graph`
+Expected: builds (a warning about unused `NativeDirection` re-export is acceptable; it is consumed in Task 3).
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add -A crates/lance-graph/src/lance_native_planner.rs crates/lance-graph/src/lance_native_planner/
+git commit -m "refactor(native): promote lance_native_planner to module dir; add NativeDirection"
+```
+
+---
+
+## Task 3: `expand_batch` + `CsrExpandNode` + `CsrExpandExec`
+
+**Files:**
+- Create: `crates/lance-graph/src/lance_native_planner/csr_expand.rs`
+- Modify: `crates/lance-graph/src/lance_native_planner/mod.rs` (add `mod csr_expand;`)
+
+- [ ] **Step 1: Write the failing test for the pure expansion function**
+
+Create `crates/lance-graph/src/lance_native_planner/csr_expand.rs` with the test module first (the rest is added in Step 3):
+
+```rust
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Native single-hop expand: logical node + physical operator + core function.
+//!
+//! `CsrExpandExec` does topology only — for each input row it looks up the
+//! source vertex's neighbors in the CSR index and emits one output row per
+//! neighbor, carrying through all input columns and appending the neighbor row
+//! id as a new column. Target property materialization is handled separately by
+//! `LanceTakeExec`.
+
+use std::any::Any;
+use std::fmt;
+use std::sync::Arc;
+
+use arrow::compute::{cast, take};
+use arrow_array::{Array, ArrayRef, RecordBatch, UInt32Array, UInt64Array};
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
+use datafusion::common::{DFSchemaRef, Result as DFResult};
+use datafusion::execution::TaskContext;
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion::physical_expr::EquivalenceProperties;
+use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
+use datafusion::physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
+    SendableRecordBatchStream,
+};
+use futures::StreamExt;
+
+use crate::csr_index::CsrIndex;
+use crate::error::{GraphError, Result};
+use super::direction::NativeDirection;
+
+/// Expand one input batch: for every input row, append one output row per
+/// neighbor of that row's source vertex.
+///
+/// `source_id_idx` is the column index of the source vertex id within `input`.
+/// `neighbor_field` is the appended column (its data type is the target id
+/// field's type; neighbor ids are cast into it). `out_schema` must equal
+/// `input.schema()` fields followed by `neighbor_field`.
+pub(crate) fn expand_batch(
+    input: &RecordBatch,
+    source_id_idx: usize,
+    csr: &CsrIndex,
+    neighbor_field: &Field,
+    out_schema: &SchemaRef,
+) -> Result<RecordBatch> {
+    let map_err = |e: arrow_schema::ArrowError, what: &str| GraphError::ExecutionError {
+        message: format!("CsrExpand {}: {}", what, e),
+        location: snafu::Location::new(file!(), line!(), column!()),
+    };
+
+    // Source ids may be any integer type; normalize to u64.
+    let src_u64 = cast(input.column(source_id_idx), &DataType::UInt64)
+        .map_err(|e| map_err(e, "cast source id to u64"))?;
+    let src = src_u64
+        .as_any()
+        .downcast_ref::<UInt64Array>()
+        .expect("cast to UInt64 yields UInt64Array");
+
+    let mut parent_idx: Vec<u32> = Vec::new();
+    let mut neighbors: Vec<u64> = Vec::new();
+    for row in 0..input.num_rows() {
+        if src.is_null(row) {
+            continue;
+        }
+        for &n in csr.neighbors(src.value(row)) {
+            parent_idx.push(row as u32);
+            neighbors.push(n);
+        }
+    }
+
+    let take_idx = UInt32Array::from(parent_idx);
+    let mut cols: Vec<ArrayRef> = Vec::with_capacity(input.num_columns() + 1);
+    for c in input.columns() {
+        cols.push(take(c, &take_idx, None).map_err(|e| map_err(e, "take carried column"))?);
+    }
+    let neigh_u64 = Arc::new(UInt64Array::from(neighbors)) as ArrayRef;
+    let neigh_col = cast(&neigh_u64, neighbor_field.data_type())
+        .map_err(|e| map_err(e, "cast neighbor id"))?;
+    cols.push(neigh_col);
+
+    RecordBatch::try_new(out_schema.clone(), cols).map_err(|e| GraphError::ExecutionError {
+        message: format!("CsrExpand build output batch: {}", e),
+        location: snafu::Location::new(file!(), line!(), column!()),
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::csr_index::CsrIndexBuilder;
+
+    fn input_batch() -> RecordBatch {
+        // a__id = [0,1,2,3], a__name = ["n0","n1","n2","n3"]
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a__id", DataType::UInt64, false),
+            Field::new("a__name", DataType::Utf8, false),
+        ]));
+        RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(UInt64Array::from(vec![0u64, 1, 2, 3])),
+                Arc::new(arrow_array::StringArray::from(vec!["n0", "n1", "n2", "n3"])),
+            ],
+        )
+        .unwrap()
+    }
+
+    fn out_schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            Field::new("a__id", DataType::UInt64, false),
+            Field::new("a__name", DataType::Utf8, false),
+            Field::new("b__id", DataType::UInt64, true),
+        ]))
+    }
+
+    #[test]
+    fn test_expand_batch_outgoing() {
+        // 0->1, 0->2, 1->2, 3-> (none)
+        let csr = CsrIndexBuilder::new()
+            .with_num_vertices(4)
+            .add_edge(0, 1)
+            .add_edge(0, 2)
+            .add_edge(1, 2)
+            .build();
+        let neighbor_field = Field::new("b__id", DataType::UInt64, true);
+        let out = expand_batch(&input_batch(), 0, &csr, &neighbor_field, &out_schema()).unwrap();
+
+        assert_eq!(out.num_rows(), 3);
+        let a_id = out.column(0).as_any().downcast_ref::<UInt64Array>().unwrap();
+        let b_id = out.column(2).as_any().downcast_ref::<UInt64Array>().unwrap();
+        let a_name = out
+            .column(1)
+            .as_any()
+            .downcast_ref::<arrow_array::StringArray>()
+            .unwrap();
+        // Rows: (0,n0,1) (0,n0,2) (1,n1,2)
+        assert_eq!(a_id.values(), &[0, 0, 1]);
+        assert_eq!(b_id.values(), &[1, 2, 2]);
+        assert_eq!(a_name.value(0), "n0");
+        assert_eq!(a_name.value(2), "n1");
+    }
+
+    #[test]
+    fn test_expand_batch_no_neighbors_and_out_of_range() {
+        let csr = CsrIndexBuilder::new().with_num_vertices(2).build(); // no edges
+        let neighbor_field = Field::new("b__id", DataType::UInt64, true);
+        let out = expand_batch(&input_batch(), 0, &csr, &neighbor_field, &out_schema()).unwrap();
+        assert_eq!(out.num_rows(), 0);
+    }
+
+    #[test]
+    fn test_expand_batch_casts_source_id_from_int64() {
+        // Source id column is Int64 (not UInt64): must still work.
+        let schema = Arc::new(Schema::new(vec![Field::new("a__id", DataType::Int64, false)]));
+        let input = RecordBatch::try_new(
+            schema,
+            vec![Arc::new(arrow_array::Int64Array::from(vec![0i64, 1]))],
+        )
+        .unwrap();
+        let out_schema = Arc::new(Schema::new(vec![
+            Field::new("a__id", DataType::Int64, false),
+            Field::new("b__id", DataType::UInt64, true),
+        ]));
+        let csr = CsrIndexBuilder::new()
+            .with_num_vertices(2)
+            .add_edge(0, 1)
+            .build();
+        let neighbor_field = Field::new("b__id", DataType::UInt64, true);
+        let out = expand_batch(&input, 0, &csr, &neighbor_field, &out_schema).unwrap();
+        assert_eq!(out.num_rows(), 1);
+        let b_id = out.column(1).as_any().downcast_ref::<UInt64Array>().unwrap();
+        assert_eq!(b_id.values(), &[1]);
+    }
+}
+```
+
+Add `mod csr_expand;` to `crates/lance-graph/src/lance_native_planner/mod.rs` (after `mod direction;`).
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cargo test -p lance-graph expand_batch`
+Expected: FAIL to compile if `expand_batch` body absent — but here it is present, so this step verifies the tests pass for the pure function. If they pass, proceed; the node/exec below add no new tests.
+
+Run: `cargo test -p lance-graph expand_batch`
+Expected: PASS (3 tests).
+
+- [ ] **Step 3: Add the logical node and physical operator**
+
+Append to `crates/lance-graph/src/lance_native_planner/csr_expand.rs` (after `expand_batch`, before `#[cfg(test)]`):
+
+```rust
+/// Logical extension node for a single-hop CSR expand.
+///
+/// Holds only hashable metadata; the physical operator (and its `CsrIndex`) is
+/// constructed by `CsrExtensionPlanner` at physical-planning time.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct CsrExpandNode {
+    /// Source subplan (a node scan, optionally with a source-only filter).
+    pub input: LogicalPlan,
+    /// Relationship type (lowercased table name to look up the edge table).
+    pub rel_type: String,
+    /// Edge table column holding source vertex ids.
+    pub src_field: String,
+    /// Edge table column holding destination vertex ids.
+    pub dst_field: String,
+    /// Traversal direction.
+    pub direction: NativeDirection,
+    /// Qualified column in `input` carrying the source vertex id (e.g. `a__id`).
+    pub source_id_column: String,
+    /// Qualified output column for the neighbor row id (e.g. `b__id`).
+    pub neighbor_column: String,
+    /// Arrow data type of the neighbor column (target id field's type).
+    pub neighbor_data_type: DataType,
+    /// Output schema = input schema + neighbor column.
+    pub schema: DFSchemaRef,
+}
+
+impl PartialOrd for CsrExpandNode {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        // Order by stable, comparable fields only.
+        (
+            &self.rel_type,
+            &self.src_field,
+            &self.dst_field,
+            &self.source_id_column,
+            &self.neighbor_column,
+        )
+            .partial_cmp(&(
+                &other.rel_type,
+                &other.src_field,
+                &other.dst_field,
+                &other.source_id_column,
+                &other.neighbor_column,
+            ))
+    }
+}
+
+impl UserDefinedLogicalNodeCore for CsrExpandNode {
+    fn name(&self) -> &str {
+        "CsrExpand"
+    }
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "CsrExpand: rel={}, dir={:?}, src={}, neighbor={}",
+            self.rel_type, self.direction, self.source_id_column, self.neighbor_column
+        )
+    }
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<Expr>,
+        mut inputs: Vec<LogicalPlan>,
+    ) -> DFResult<Self> {
+        Ok(Self {
+            input: inputs.remove(0),
+            ..self.clone()
+        })
+    }
+}
+
+/// Physical operator for `CsrExpandNode`.
+#[derive(Debug)]
+pub struct CsrExpandExec {
+    input: Arc<dyn ExecutionPlan>,
+    csr: Arc<CsrIndex>,
+    source_id_idx: usize,
+    neighbor_field: Field,
+    out_schema: SchemaRef,
+    props: PlanProperties,
+}
+
+impl CsrExpandExec {
+    pub fn new(
+        input: Arc<dyn ExecutionPlan>,
+        csr: Arc<CsrIndex>,
+        source_id_idx: usize,
+        neighbor_field: Field,
+        out_schema: SchemaRef,
+    ) -> Self {
+        let props = PlanProperties::new(
+            EquivalenceProperties::new(out_schema.clone()),
+            Partitioning::UnknownPartitioning(1),
+            EmissionType::Incremental,
+            Boundedness::Bounded,
+        );
+        Self {
+            input,
+            csr,
+            source_id_idx,
+            neighbor_field,
+            out_schema,
+            props,
+        }
+    }
+}
+
+impl DisplayAs for CsrExpandExec {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "CsrExpandExec: neighbor={}", self.neighbor_field.name())
+    }
+}
+
+impl ExecutionPlan for CsrExpandExec {
+    fn name(&self) -> &str {
+        "CsrExpandExec"
+    }
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn properties(&self) -> &PlanProperties {
+        &self.props
+    }
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> DFResult<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(CsrExpandExec::new(
+            children[0].clone(),
+            self.csr.clone(),
+            self.source_id_idx,
+            self.neighbor_field.clone(),
+            self.out_schema.clone(),
+        )))
+    }
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> DFResult<SendableRecordBatchStream> {
+        let input = self.input.execute(partition, context)?;
+        let csr = self.csr.clone();
+        let idx = self.source_id_idx;
+        let field = self.neighbor_field.clone();
+        let out_schema = self.out_schema.clone();
+        let out_schema_for_stream = out_schema.clone();
+        let stream = input.map(move |rb| {
+            let rb = rb?;
+            expand_batch(&rb, idx, &csr, &field, &out_schema)
+                .map_err(|e| datafusion::error::DataFusionError::Execution(e.to_string()))
+        });
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            out_schema_for_stream,
+            stream,
+        )))
+    }
+}
+```
+
+- [ ] **Step 4: Build and run tests**
+
+Run: `cargo test -p lance-graph csr_expand`
+Expected: PASS (3 tests; node/exec compile).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add crates/lance-graph/src/lance_native_planner/
+git commit -m "feat(native): CsrExpandNode/Exec and expand_batch core"
+```
+
+---
+
+## Task 4: `RowMaterializer` + `take_batch` + `LanceTakeNode` + `LanceTakeExec`
+
+**Files:**
+- Create: `crates/lance-graph/src/lance_native_planner/take.rs`
+- Modify: `crates/lance-graph/src/lance_native_planner/mod.rs` (add `mod take;`)
+
+- [ ] **Step 1: Write the failing test for the materializer + take function**
+
+Create `crates/lance-graph/src/lance_native_planner/take.rs`:
+
+```rust
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Native materialization: take target node columns by row id.
+//!
+//! `CsrExpandExec` produces target *row ids*; `LanceTakeExec` turns those into
+//! target *properties* via a `RowMaterializer`. Under the dense-ROWID model the
+//! in-memory materializer is a direct `arrow::compute::take` by offset — the
+//! concrete reason CSR beats a hash join.
+
+use std::any::Any;
+use std::fmt;
+use std::sync::Arc;
+
+use arrow::compute::{cast, take};
+use arrow_array::{ArrayRef, RecordBatch, UInt64Array};
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
+use datafusion::common::{DFSchemaRef, Result as DFResult};
+use datafusion::execution::TaskContext;
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion::physical_expr::EquivalenceProperties;
+use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
+use datafusion::physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
+    SendableRecordBatchStream,
+};
+use futures::StreamExt;
+
+use crate::error::{GraphError, Result};
+
+/// Materializes rows of a target node table by row id.
+pub trait RowMaterializer: Send + Sync + fmt::Debug {
+    /// Take `columns` (raw, unqualified names) for the given `row_ids`.
+    /// The returned batch has one row per element of `row_ids`, columns in the
+    /// requested order, named by their raw names.
+    fn take(&self, row_ids: &UInt64Array, columns: &[String]) -> Result<RecordBatch>;
+}
+
+/// In-memory materializer over a fully-collected target node batch. Row id ==
+/// offset into the batch (dense-ROWID model).
+#[derive(Debug)]
+pub struct InMemoryMaterializer {
+    batch: RecordBatch,
+}
+
+impl InMemoryMaterializer {
+    pub fn new(batch: RecordBatch) -> Self {
+        Self { batch }
+    }
+}
+
+impl RowMaterializer for InMemoryMaterializer {
+    fn take(&self, row_ids: &UInt64Array, columns: &[String]) -> Result<RecordBatch> {
+        let mut fields: Vec<Field> = Vec::with_capacity(columns.len());
+        let mut arrays: Vec<ArrayRef> = Vec::with_capacity(columns.len());
+        for name in columns {
+            let col = self
+                .batch
+                .column_by_name(name)
+                .ok_or_else(|| GraphError::ExecutionError {
+                    message: format!("take: target column '{}' not found", name),
+                    location: snafu::Location::new(file!(), line!(), column!()),
+                })?;
+            let taken = take(col, row_ids, None).map_err(|e| GraphError::ExecutionError {
+                message: format!("take: failed on column '{}': {}", name, e),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?;
+            fields.push(Field::new(name, col.data_type().clone(), true));
+            arrays.push(taken);
+        }
+        RecordBatch::try_new(Arc::new(Schema::new(fields)), arrays).map_err(|e| {
+            GraphError::ExecutionError {
+                message: format!("take: build batch: {}", e),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            }
+        })
+    }
+}
+
+/// Append materialized target columns to one input batch.
+///
+/// `row_id_idx` is the index of the row-id column in `input`. `take_cols` are
+/// the raw target column names to materialize, in the same order as the
+/// appended fields of `out_schema`. `out_schema` = `input.schema()` followed by
+/// the qualified materialized columns.
+pub(crate) fn take_batch(
+    input: &RecordBatch,
+    row_id_idx: usize,
+    materializer: &dyn RowMaterializer,
+    take_cols: &[String],
+    out_schema: &SchemaRef,
+) -> Result<RecordBatch> {
+    let ids_u64 = cast(input.column(row_id_idx), &DataType::UInt64).map_err(|e| {
+        GraphError::ExecutionError {
+            message: format!("take: cast row id to u64: {}", e),
+            location: snafu::Location::new(file!(), line!(), column!()),
+        }
+    })?;
+    let ids = ids_u64
+        .as_any()
+        .downcast_ref::<UInt64Array>()
+        .expect("cast to UInt64 yields UInt64Array");
+
+    let materialized = materializer.take(ids, take_cols)?;
+
+    let mut cols: Vec<ArrayRef> = input.columns().to_vec();
+    cols.extend(materialized.columns().iter().cloned());
+
+    RecordBatch::try_new(out_schema.clone(), cols).map_err(|e| GraphError::ExecutionError {
+        message: format!("take: build output batch: {}", e),
+        location: snafu::Location::new(file!(), line!(), column!()),
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_array::StringArray;
+
+    fn target_batch() -> RecordBatch {
+        // person table: id, name, age (raw, lowercased column names)
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::UInt64, false),
+            Field::new("name", DataType::Utf8, false),
+            Field::new("age", DataType::Int64, false),
+        ]));
+        RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(UInt64Array::from(vec![0u64, 1, 2])),
+                Arc::new(StringArray::from(vec!["alice", "bob", "carol"])),
+                Arc::new(arrow_array::Int64Array::from(vec![30i64, 40, 50])),
+            ],
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn test_in_memory_materializer_take_subset() {
+        let m = InMemoryMaterializer::new(target_batch());
+        let ids = UInt64Array::from(vec![2u64, 0]);
+        let out = m.take(&ids, &["name".to_string()]).unwrap();
+        assert_eq!(out.num_columns(), 1);
+        let names = out.column(0).as_any().downcast_ref::<StringArray>().unwrap();
+        assert_eq!(names.value(0), "carol");
+        assert_eq!(names.value(1), "alice");
+    }
+
+    #[test]
+    fn test_in_memory_materializer_missing_column_errors() {
+        let m = InMemoryMaterializer::new(target_batch());
+        let ids = UInt64Array::from(vec![0u64]);
+        assert!(m.take(&ids, &["nonexistent".to_string()]).is_err());
+    }
+
+    #[test]
+    fn test_take_batch_appends_qualified_columns() {
+        // input: a__name, b__id  (b__id is the neighbor row id)
+        let in_schema = Arc::new(Schema::new(vec![
+            Field::new("a__name", DataType::Utf8, false),
+            Field::new("b__id", DataType::UInt64, true),
+        ]));
+        let input = RecordBatch::try_new(
+            in_schema,
+            vec![
+                Arc::new(StringArray::from(vec!["x", "y"])),
+                Arc::new(UInt64Array::from(vec![1u64, 2])),
+            ],
+        )
+        .unwrap();
+        // out: a__name, b__id, b__name, b__age
+        let out_schema = Arc::new(Schema::new(vec![
+            Field::new("a__name", DataType::Utf8, false),
+            Field::new("b__id", DataType::UInt64, true),
+            Field::new("b__name", DataType::Utf8, true),
+            Field::new("b__age", DataType::Int64, true),
+        ]));
+        let m = InMemoryMaterializer::new(target_batch());
+        let out = take_batch(
+            &input,
+            1,
+            &m,
+            &["name".to_string(), "age".to_string()],
+            &out_schema,
+        )
+        .unwrap();
+        assert_eq!(out.num_rows(), 2);
+        let b_name = out.column(2).as_any().downcast_ref::<StringArray>().unwrap();
+        let b_age = out
+            .column(3)
+            .as_any()
+            .downcast_ref::<arrow_array::Int64Array>()
+            .unwrap();
+        assert_eq!(b_name.value(0), "bob"); // row id 1
+        assert_eq!(b_name.value(1), "carol"); // row id 2
+        assert_eq!(b_age.values(), &[40, 50]);
+    }
+}
+```
+
+Add `mod take;` to `crates/lance-graph/src/lance_native_planner/mod.rs`.
+
+- [ ] **Step 2: Run tests to verify they pass**
+
+Run: `cargo test -p lance-graph -- take::tests`
+Expected: PASS (3 tests).
+
+- [ ] **Step 3: Add the logical node and physical operator**
+
+Append to `crates/lance-graph/src/lance_native_planner/take.rs` (after `take_batch`, before `#[cfg(test)]`):
+
+```rust
+/// Logical extension node for materializing target columns via take().
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct LanceTakeNode {
+    /// Input subplan (a `CsrExpandNode`).
+    pub input: LogicalPlan,
+    /// Lowercased target node table name (to collect rows from).
+    pub target_table: String,
+    /// Qualified column in `input` holding the row ids (e.g. `b__id`).
+    pub row_id_column: String,
+    /// Raw (unqualified, lowercased) target columns to materialize, in output order.
+    pub take_cols: Vec<String>,
+    /// Output schema = input schema + qualified materialized columns.
+    pub schema: DFSchemaRef,
+}
+
+impl PartialOrd for LanceTakeNode {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        (&self.target_table, &self.row_id_column, &self.take_cols).partial_cmp(&(
+            &other.target_table,
+            &other.row_id_column,
+            &other.take_cols,
+        ))
+    }
+}
+
+impl UserDefinedLogicalNodeCore for LanceTakeNode {
+    fn name(&self) -> &str {
+        "LanceTake"
+    }
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "LanceTake: table={}, row_id={}, cols={:?}",
+            self.target_table, self.row_id_column, self.take_cols
+        )
+    }
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<Expr>,
+        mut inputs: Vec<LogicalPlan>,
+    ) -> DFResult<Self> {
+        Ok(Self {
+            input: inputs.remove(0),
+            ..self.clone()
+        })
+    }
+}
+
+/// Physical operator for `LanceTakeNode`.
+#[derive(Debug)]
+pub struct LanceTakeExec {
+    input: Arc<dyn ExecutionPlan>,
+    materializer: Arc<dyn RowMaterializer>,
+    row_id_idx: usize,
+    take_cols: Vec<String>,
+    out_schema: SchemaRef,
+    props: PlanProperties,
+}
+
+impl LanceTakeExec {
+    pub fn new(
+        input: Arc<dyn ExecutionPlan>,
+        materializer: Arc<dyn RowMaterializer>,
+        row_id_idx: usize,
+        take_cols: Vec<String>,
+        out_schema: SchemaRef,
+    ) -> Self {
+        let props = PlanProperties::new(
+            EquivalenceProperties::new(out_schema.clone()),
+            Partitioning::UnknownPartitioning(1),
+            EmissionType::Incremental,
+            Boundedness::Bounded,
+        );
+        Self {
+            input,
+            materializer,
+            row_id_idx,
+            take_cols,
+            out_schema,
+            props,
+        }
+    }
+}
+
+impl DisplayAs for LanceTakeExec {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "LanceTakeExec: cols={:?}", self.take_cols)
+    }
+}
+
+impl ExecutionPlan for LanceTakeExec {
+    fn name(&self) -> &str {
+        "LanceTakeExec"
+    }
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn properties(&self) -> &PlanProperties {
+        &self.props
+    }
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> DFResult<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(LanceTakeExec::new(
+            children[0].clone(),
+            self.materializer.clone(),
+            self.row_id_idx,
+            self.take_cols.clone(),
+            self.out_schema.clone(),
+        )))
+    }
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> DFResult<SendableRecordBatchStream> {
+        let input = self.input.execute(partition, context)?;
+        let materializer = self.materializer.clone();
+        let row_id_idx = self.row_id_idx;
+        let take_cols = self.take_cols.clone();
+        let out_schema = self.out_schema.clone();
+        let out_schema_for_stream = out_schema.clone();
+        let stream = input.map(move |rb| {
+            let rb = rb?;
+            take_batch(&rb, row_id_idx, materializer.as_ref(), &take_cols, &out_schema)
+                .map_err(|e| datafusion::error::DataFusionError::Execution(e.to_string()))
+        });
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            out_schema_for_stream,
+            stream,
+        )))
+    }
+}
+```
+
+- [ ] **Step 4: Build and run tests**
+
+Run: `cargo test -p lance-graph -- take::tests`
+Expected: PASS (3 tests; node/exec compile).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add crates/lance-graph/src/lance_native_planner/
+git commit -m "feat(native): RowMaterializer, LanceTakeNode/Exec, take_batch core"
+```
+
+---
+
+## Task 5: `CsrExtensionPlanner` + `CsrQueryPlanner`
+
+Turns the two logical nodes into physical operators, building the `CsrIndex` and `InMemoryMaterializer` from tables registered on the session.
+
+**Files:**
+- Create: `crates/lance-graph/src/lance_native_planner/extension_planner.rs`
+- Modify: `crates/lance-graph/src/lance_native_planner/mod.rs` (add `mod extension_planner; pub use extension_planner::CsrQueryPlanner;`)
+
+- [ ] **Step 1: Create the extension planner and query planner**
+
+Create `crates/lance-graph/src/lance_native_planner/extension_planner.rs`:
+
+```rust
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Physical planning for the native CSR extension nodes.
+//!
+//! `CsrQueryPlanner` is registered on the execution `SessionContext`. It runs
+//! the `DefaultPhysicalPlanner` with `CsrExtensionPlanner`, which builds the
+//! `CsrIndex` (from the edge table) and the `InMemoryMaterializer` (from the
+//! target node table) at physical-planning time.
+
+use std::sync::Arc;
+
+use arrow::compute::concat_batches;
+use arrow_schema::Field;
+use async_trait::async_trait;
+use datafusion::common::Result as DFResult;
+use datafusion::error::DataFusionError;
+use datafusion::execution::context::{QueryPlanner, SessionContext, SessionState};
+use datafusion::execution::TaskContext;
+use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNode};
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_planner::{
+    DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner,
+};
+
+use super::csr_expand::{CsrExpandExec, CsrExpandNode};
+use super::direction::NativeDirection;
+use super::take::{CsrExtensionMaterializer, InMemoryMaterializer, LanceTakeExec, LanceTakeNode};
+use crate::csr_index::CsrIndexBuilder;
+
+/// Collect a registered table to a single `RecordBatch`.
+async fn collect_table(
+    session_state: &SessionState,
+    table: &str,
+) -> DFResult<arrow_array::RecordBatch> {
+    let ctx = SessionContext::new_with_state(session_state.clone());
+    let df = ctx.table(table).await?;
+    let schema = df.schema().inner().clone();
+    let batches = df.collect().await?;
+    concat_batches(&schema, &batches).map_err(|e| DataFusionError::Execution(e.to_string()))
+}
+
+/// Extension planner that lowers `CsrExpandNode` and `LanceTakeNode`.
+#[derive(Debug)]
+pub struct CsrExtensionPlanner;
+
+#[async_trait]
+impl ExtensionPlanner for CsrExtensionPlanner {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        _logical_inputs: &[&LogicalPlan],
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+        session_state: &SessionState,
+    ) -> DFResult<Option<Arc<dyn ExecutionPlan>>> {
+        if let Some(expand) = node.as_any().downcast_ref::<CsrExpandNode>() {
+            let input = physical_inputs[0].clone();
+
+            // Build CSR from the edge table (reverse columns for Incoming).
+            let edges = collect_table(session_state, &expand.rel_type).await?;
+            let (src_col, dst_col) = match expand.direction {
+                NativeDirection::Outgoing => (&expand.src_field, &expand.dst_field),
+                NativeDirection::Incoming => (&expand.dst_field, &expand.src_field),
+            };
+            let csr = CsrIndexBuilder::new()
+                .add_edges_from_batch_with_columns(&edges, src_col, dst_col)
+                .map_err(|e| DataFusionError::Execution(e.to_string()))?
+                .build();
+
+            let in_schema = input.schema();
+            let source_id_idx = in_schema
+                .index_of(&expand.source_id_column)
+                .map_err(|e| DataFusionError::Execution(format!(
+                    "CsrExpand: source id column '{}' not found in input: {}",
+                    expand.source_id_column, e
+                )))?;
+            let neighbor_field = Field::new(
+                &expand.neighbor_column,
+                expand.neighbor_data_type.clone(),
+                true,
+            );
+            let out_schema = expand.schema.inner().clone();
+
+            return Ok(Some(Arc::new(CsrExpandExec::new(
+                input,
+                Arc::new(csr),
+                source_id_idx,
+                neighbor_field,
+                out_schema,
+            ))));
+        }
+
+        if let Some(take) = node.as_any().downcast_ref::<LanceTakeNode>() {
+            let input = physical_inputs[0].clone();
+
+            let target = collect_table(session_state, &take.target_table).await?;
+            let materializer = Arc::new(InMemoryMaterializer::new(target));
+
+            let in_schema = input.schema();
+            let row_id_idx = in_schema
+                .index_of(&take.row_id_column)
+                .map_err(|e| DataFusionError::Execution(format!(
+                    "LanceTake: row id column '{}' not found in input: {}",
+                    take.row_id_column, e
+                )))?;
+            let out_schema = take.schema.inner().clone();
+
+            return Ok(Some(Arc::new(LanceTakeExec::new(
+                input,
+                materializer as Arc<dyn CsrExtensionMaterializer>,
+                row_id_idx,
+                take.take_cols.clone(),
+                out_schema,
+            ))));
+        }
+
+        Ok(None)
+    }
+}
+
+/// Query planner that installs `CsrExtensionPlanner`.
+#[derive(Debug, Default)]
+pub struct CsrQueryPlanner;
+
+impl CsrQueryPlanner {
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+#[async_trait]
+impl QueryPlanner for CsrQueryPlanner {
+    async fn create_physical_plan(
+        &self,
+        logical_plan: &LogicalPlan,
+        session_state: &SessionState,
+    ) -> DFResult<Arc<dyn ExecutionPlan>> {
+        let planner =
+            DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(CsrExtensionPlanner)]);
+        planner
+            .create_physical_plan(logical_plan, session_state)
+            .await
+    }
+}
+
+// Silence unused import in some build configs.
+#[allow(unused_imports)]
+use TaskContext as _TaskContext;
+```
+
+> Note: `CsrExtensionMaterializer` referenced above is a type alias for
+> `dyn RowMaterializer` used to keep the trait-object cast explicit. Add to
+> `take.rs` (top level, after the `RowMaterializer` trait):
+> ```rust
+> /// Convenience alias for the boxed materializer trait object.
+> pub type CsrExtensionMaterializer = dyn RowMaterializer;
+> ```
+> and change `LanceTakeExec::new` / field type from `Arc<dyn RowMaterializer>` to
+> accept `Arc<dyn RowMaterializer>` (the alias is `dyn RowMaterializer`, so
+> `Arc<CsrExtensionMaterializer>` == `Arc<dyn RowMaterializer>`; no signature
+> change needed — pass `materializer` directly without the `as` cast if simpler).
+
+- [ ] **Step 2: Update mod.rs**
+
+Add to `crates/lance-graph/src/lance_native_planner/mod.rs`:
+
+```rust
+mod extension_planner;
+
+pub use extension_planner::CsrQueryPlanner;
+```
+
+- [ ] **Step 3: Build**
+
+Run: `cargo build -p lance-graph`
+Expected: builds. If the `CsrExtensionMaterializer` alias causes friction, delete the alias and the `as Arc<dyn CsrExtensionMaterializer>` cast and pass `materializer` directly (it is already `Arc<InMemoryMaterializer>`; coercion to `Arc<dyn RowMaterializer>` is automatic at the call site because `LanceTakeExec::new` takes `Arc<dyn RowMaterializer>`).
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add crates/lance-graph/src/lance_native_planner/
+git commit -m "feat(native): CsrExtensionPlanner + CsrQueryPlanner physical planning"
+```
+
+---
+
+## Task 6: `LanceNativePlanner` lowering + fallback
+
+Rewrites `mod.rs` to lower a supported single-hop `Expand` onto the extension nodes and delegate everything else to `DataFusionPlanner`.
+
+**Files:**
+- Modify: `crates/lance-graph/src/lance_native_planner/mod.rs`
+- Modify: `crates/lance-graph/src/datafusion_planner/mod.rs:20` (`mod expression;` → `pub(crate) mod expression;`)
+
+- [ ] **Step 1: Expose the expression helpers**
+
+In `crates/lance-graph/src/datafusion_planner/mod.rs`, change:
+
+```rust
+mod expression;
+```
+to:
+```rust
+pub(crate) mod expression;
+```
+
+- [ ] **Step 2: Write the failing planner tests**
+
+Replace the body of `crates/lance-graph/src/lance_native_planner/mod.rs` (keep the
+`mod`/`pub use` declarations from Tasks 2–5) so the file is:
+
+```rust
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Lance Native physical planner.
+//!
+//! Lowers a supported single-hop `Expand` onto CSR-backed extension nodes
+//! (`CsrExpandNode` + `LanceTakeNode`). Any plan it cannot serve natively is
+//! delegated wholesale to `DataFusionPlanner`, so `LanceNative` execution is
+//! always correct on valid Cypher — it simply uses joins when CSR cannot help.
+
+mod csr_expand;
+mod direction;
+mod extension_planner;
+mod take;
+
+pub use direction::NativeDirection;
+pub use extension_planner::CsrQueryPlanner;
+
+use std::sync::Arc;
+
+use datafusion::common::DFSchema;
+use datafusion::logical_expr::{Expr, Extension, LogicalPlan, LogicalPlanBuilder};
+use arrow_schema::{Field, Schema};
+
+use crate::ast::RelationshipDirection;
+use crate::case_insensitive::qualify_column;
+use crate::config::GraphConfig;
+use crate::datafusion_planner::expression::{
+    to_df_boolean_expr, to_df_value_expr,
+};
+use crate::datafusion_planner::{
+    analysis, DataFusionPlanner, GraphPhysicalPlanner, PlanningContext,
+};
+use crate::error::{GraphError, Result};
+use crate::logical_plan::{LogicalOperator, ProjectionItem, SortItem};
+
+use csr_expand::CsrExpandNode;
+use direction::NativeDirection;
+use take::LanceTakeNode;
+
+/// Lance-native planner: CSR single-hop expand with DataFusion fallback.
+pub struct LanceNativePlanner {
+    config: GraphConfig,
+    df: DataFusionPlanner,
+}
+
+impl LanceNativePlanner {
+    pub fn new(config: GraphConfig) -> Self {
+        Self {
+            df: DataFusionPlanner::new(config.clone()),
+            config,
+        }
+    }
+
+    pub fn with_catalog(
+        config: GraphConfig,
+        catalog: Arc<dyn lance_graph_catalog::GraphSourceCatalog>,
+    ) -> Self {
+        Self {
+            df: DataFusionPlanner::with_catalog(config.clone(), catalog),
+            config,
+        }
+    }
+}
+
+impl GraphPhysicalPlanner for LanceNativePlanner {
+    fn plan(&self, logical_plan: &LogicalOperator) -> Result<LogicalPlan> {
+        if !can_plan_natively(logical_plan) {
+            return self.df.plan(logical_plan);
+        }
+        let analysis = analysis::analyze(logical_plan)?;
+        let mut ctx = PlanningContext::new(&analysis);
+        self.build_native(&mut ctx, logical_plan)
+    }
+}
+
+impl LanceNativePlanner {
+    /// Build the native plan for a supported tree. Unary operators above the
+    /// single expand are rebuilt on the native child; the expand itself lowers
+    /// to `CsrExpandNode` + `LanceTakeNode`.
+    fn build_native(
+        &self,
+        ctx: &mut PlanningContext,
+        op: &LogicalOperator,
+    ) -> Result<LogicalPlan> {
+        match op {
+            LogicalOperator::ScanByLabel {
+                variable,
+                label,
+                properties,
+            } => self.df.build_scan(ctx, variable, label, properties),
+
+            LogicalOperator::Filter { input, predicate } => {
+                let child = self.build_native(ctx, input)?;
+                let expr = to_df_boolean_expr(predicate);
+                LogicalPlanBuilder::from(child)
+                    .filter(expr)
+                    .map_err(|e| self.plan_err("filter", e))?
+                    .build()
+                    .map_err(|e| self.plan_err("filter build", e))
+            }
+
+            LogicalOperator::Project { input, projections } => {
+                let child = self.build_native(ctx, input)?;
+                self.build_project_on(child, projections)
+            }
+
+            LogicalOperator::Sort { input, sort_items } => {
+                let child = self.build_native(ctx, input)?;
+                self.build_sort_on(child, sort_items)
+            }
+
+            LogicalOperator::Limit { input, count } => {
+                let child = self.build_native(ctx, input)?;
+                LogicalPlanBuilder::from(child)
+                    .limit(0, Some(*count as usize))
+                    .map_err(|e| self.plan_err("limit", e))?
+                    .build()
+                    .map_err(|e| self.plan_err("limit build", e))
+            }
+
+            LogicalOperator::Offset { input, offset } => {
+                let child = self.build_native(ctx, input)?;
+                LogicalPlanBuilder::from(child)
+                    .limit(*offset as usize, None)
+                    .map_err(|e| self.plan_err("offset", e))?
+                    .build()
+                    .map_err(|e| self.plan_err("offset build", e))
+            }
+
+            LogicalOperator::Distinct { input } => {
+                let child = self.build_native(ctx, input)?;
+                LogicalPlanBuilder::from(child)
+                    .distinct()
+                    .map_err(|e| self.plan_err("distinct", e))?
+                    .build()
+                    .map_err(|e| self.plan_err("distinct build", e))
+            }
+
+            LogicalOperator::Expand {
+                input,
+                source_variable,
+                target_variable,
+                target_label,
+                relationship_types,
+                direction,
+                ..
+            } => self.build_expand_native(
+                ctx,
+                input,
+                source_variable,
+                target_variable,
+                target_label,
+                relationship_types,
+                direction,
+            ),
+
+            // Unsupported here would have been rejected by can_plan_natively.
+            other => Err(GraphError::PlanError {
+                message: format!("native planner reached unsupported operator: {:?}", other),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            }),
+        }
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn build_expand_native(
+        &self,
+        ctx: &mut PlanningContext,
+        input: &LogicalOperator,
+        source_variable: &str,
+        target_variable: &str,
+        target_label: &str,
+        relationship_types: &[String],
+        direction: &RelationshipDirection,
+    ) -> Result<LogicalPlan> {
+        let source_plan = self.build_native(ctx, input)?;
+
+        let rel_type = &relationship_types[0];
+        let rel_map = self.config.get_relationship_mapping(rel_type).ok_or_else(|| {
+            GraphError::ConfigError {
+                message: format!("No relationship mapping for '{}'", rel_type),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            }
+        })?;
+
+        let src_label = ctx
+            .analysis
+            .var_to_label
+            .get(source_variable)
+            .ok_or_else(|| GraphError::PlanError {
+                message: format!("No label for source variable '{}'", source_variable),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?;
+        let src_node = self.config.get_node_mapping(src_label).ok_or_else(|| {
+            GraphError::ConfigError {
+                message: format!("No node mapping for label '{}'", src_label),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            }
+        })?;
+        let tgt_node = self.config.get_node_mapping(target_label).ok_or_else(|| {
+            GraphError::ConfigError {
+                message: format!("No node mapping for label '{}'", target_label),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            }
+        })?;
+
+        let catalog = self.df.catalog_ref().ok_or_else(|| GraphError::ConfigError {
+            message: "LanceNativePlanner requires a catalog for native expand".to_string(),
+            location: snafu::Location::new(file!(), line!(), column!()),
+        })?;
+        let tgt_source = catalog.node_source(target_label).ok_or_else(|| {
+            GraphError::ConfigError {
+                message: format!("No table source for target label '{}'", target_label),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            }
+        })?;
+        let tgt_arrow = tgt_source.schema();
+
+        let direction = match direction {
+            RelationshipDirection::Outgoing => NativeDirection::Outgoing,
+            RelationshipDirection::Incoming => NativeDirection::Incoming,
+            RelationshipDirection::Undirected => {
+                return Err(GraphError::PlanError {
+                    message: "undirected expand is not natively supported".to_string(),
+                    location: snafu::Location::new(file!(), line!(), column!()),
+                });
+            }
+        };
+
+        let source_id_column = qualify_column(source_variable, &src_node.id_field);
+        let neighbor_column = qualify_column(target_variable, &tgt_node.id_field);
+        let neighbor_data_type = tgt_arrow
+            .field_with_name(&tgt_node.id_field.to_lowercase())
+            .map_err(|e| GraphError::ConfigError {
+                message: format!(
+                    "target id field '{}' not found in '{}': {}",
+                    tgt_node.id_field, target_label, e
+                ),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?
+            .data_type()
+            .clone();
+
+        // CsrExpandNode output schema = source schema + neighbor column.
+        let src_arrow = source_plan.schema().inner();
+        let mut expand_fields: Vec<Field> =
+            src_arrow.fields().iter().map(|f| f.as_ref().clone()).collect();
+        expand_fields.push(Field::new(
+            &neighbor_column,
+            neighbor_data_type.clone(),
+            true,
+        ));
+        let expand_arrow = Schema::new(expand_fields);
+        let expand_schema = Arc::new(
+            DFSchema::try_from(expand_arrow).map_err(|e| self.plan_err("expand schema", e))?,
+        );
+
+        let expand_node = CsrExpandNode {
+            input: source_plan,
+            rel_type: rel_type.to_lowercase(),
+            src_field: rel_map.source_id_field.to_lowercase(),
+            dst_field: rel_map.target_id_field.to_lowercase(),
+            direction,
+            source_id_column,
+            neighbor_column: neighbor_column.clone(),
+            neighbor_data_type,
+            schema: expand_schema.clone(),
+        };
+        let expand_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(expand_node),
+        });
+
+        // LanceTakeNode: materialize all target columns except the id field.
+        let id_lower = tgt_node.id_field.to_lowercase();
+        let take_cols: Vec<String> = tgt_arrow
+            .fields()
+            .iter()
+            .map(|f| f.name().to_lowercase())
+            .filter(|n| n != &id_lower)
+            .collect();
+
+        let mut take_fields: Vec<Field> = expand_arrow_fields(&expand_plan);
+        for raw in &take_cols {
+            let f = tgt_arrow
+                .field_with_name(raw)
+                .map_err(|e| self.plan_err("target field", e))?;
+            take_fields.push(Field::new(
+                qualify_column(target_variable, raw),
+                f.data_type().clone(),
+                true,
+            ));
+        }
+        let take_arrow = Schema::new(take_fields);
+        let take_schema =
+            Arc::new(DFSchema::try_from(take_arrow).map_err(|e| self.plan_err("take schema", e))?);
+
+        let take_node = LanceTakeNode {
+            input: expand_plan,
+            target_table: target_label.to_lowercase(),
+            row_id_column: neighbor_column,
+            take_cols,
+            schema: take_schema,
+        };
+        Ok(LogicalPlan::Extension(Extension {
+            node: Arc::new(take_node),
+        }))
+    }
+
+    fn build_project_on(
+        &self,
+        input: LogicalPlan,
+        projections: &[ProjectionItem],
+    ) -> Result<LogicalPlan> {
+        // Delegate to DataFusionPlanner's project-on-plan helpers (handle
+        // aggregates + Cypher dot-notation aliasing identically to the join path).
+        let has_agg = projections.iter().any(|p| {
+            crate::datafusion_planner::expression::contains_aggregate(&p.expression)
+        });
+        if has_agg {
+            self.df.build_project_with_aggregates(input, projections)
+        } else {
+            self.df.build_simple_project(input, projections)
+        }
+    }
+
+    fn build_sort_on(&self, input: LogicalPlan, sort_items: &[SortItem]) -> Result<LogicalPlan> {
+        use datafusion::logical_expr::SortExpr;
+        let sort_exprs: Vec<SortExpr> = sort_items
+            .iter()
+            .map(|item| {
+                let expr = to_df_value_expr(&item.expression);
+                let asc = matches!(item.direction, crate::ast::SortDirection::Ascending);
+                SortExpr {
+                    expr,
+                    asc,
+                    nulls_first: true,
+                }
+            })
+            .collect();
+        LogicalPlanBuilder::from(input)
+            .sort(sort_exprs)
+            .map_err(|e| self.plan_err("sort", e))?
+            .build()
+            .map_err(|e| self.plan_err("sort build", e))
+    }
+
+    fn plan_err<E: std::fmt::Display>(&self, what: &str, e: E) -> GraphError {
+        GraphError::PlanError {
+            message: format!("native {}: {}", what, e),
+            location: snafu::Location::new(file!(), line!(), column!()),
+        }
+    }
+}
+
+/// Extract the arrow fields of a `CsrExpandNode` extension plan as owned `Field`s.
+fn expand_arrow_fields(plan: &LogicalPlan) -> Vec<Field> {
+    plan.schema()
+        .inner()
+        .fields()
+        .iter()
+        .map(|f| f.as_ref().clone())
+        .collect()
+}
+
+/// True iff the plan is a single-hop expand the native planner can serve.
+fn can_plan_natively(op: &LogicalOperator) -> bool {
+    let mut expands = 0usize;
+    if !walk_supported(op, &mut expands) {
+        return false;
+    }
+    expands == 1
+}
+
+fn walk_supported(op: &LogicalOperator, expands: &mut usize) -> bool {
+    match op {
+        LogicalOperator::ScanByLabel { .. } => true,
+        LogicalOperator::Filter { input, .. }
+        | LogicalOperator::Project { input, .. }
+        | LogicalOperator::Sort { input, .. }
+        | LogicalOperator::Limit { input, .. }
+        | LogicalOperator::Offset { input, .. }
+        | LogicalOperator::Distinct { input } => walk_supported(input, expands),
+        LogicalOperator::Expand {
+            input,
+            relationship_types,
+            direction,
+            ..
+        } => {
+            *expands += 1;
+            if relationship_types.len() != 1 {
+                return false;
+            }
+            if matches!(direction, RelationshipDirection::Undirected) {
+                return false;
+            }
+            walk_supported(input, expands)
+        }
+        // Not supported natively in Phase 2.
+        LogicalOperator::VariableLengthExpand { .. }
+        | LogicalOperator::Join { .. }
+        | LogicalOperator::Unwind { .. } => false,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::datafusion_planner::test_fixtures::{make_catalog, person_knows_config, person_scan};
+    use crate::logical_plan::{LogicalOperator, ProjectionItem};
+    use crate::ast::{PropertyRef, ValueExpression};
+
+    fn knows_expand(direction: RelationshipDirection) -> LogicalOperator {
+        LogicalOperator::Expand {
+            input: Box::new(person_scan("a")),
+            source_variable: "a".to_string(),
+            target_variable: "b".to_string(),
+            target_label: "Person".to_string(),
+            relationship_types: vec!["KNOWS".to_string()],
+            direction,
+            relationship_variable: None,
+            properties: Default::default(),
+            target_properties: Default::default(),
+        }
+    }
+
+    #[test]
+    fn test_can_plan_natively_single_hop() {
+        let plan = LogicalOperator::Project {
+            input: Box::new(knows_expand(RelationshipDirection::Outgoing)),
+            projections: vec![ProjectionItem {
+                expression: ValueExpression::Property(PropertyRef::new("b", "name")),
+                alias: None,
+            }],
+        };
+        assert!(can_plan_natively(&plan));
+    }
+
+    #[test]
+    fn test_cannot_plan_undirected_or_multitype() {
+        assert!(!can_plan_natively(&knows_expand(RelationshipDirection::Undirected)));
+        let mut multi = knows_expand(RelationshipDirection::Outgoing);
+        if let LogicalOperator::Expand {
+            relationship_types, ..
+        } = &mut multi
+        {
+            relationship_types.push("LIKES".to_string());
+        }
+        assert!(!can_plan_natively(&multi));
+    }
+
+    #[test]
+    fn test_cannot_plan_zero_expands() {
+        assert!(!can_plan_natively(&person_scan("a")));
+    }
+
+    #[test]
+    fn test_native_plan_contains_extension_nodes() {
+        let plan = LogicalOperator::Project {
+            input: Box::new(knows_expand(RelationshipDirection::Outgoing)),
+            projections: vec![ProjectionItem {
+                expression: ValueExpression::Property(PropertyRef::new("b", "name")),
+                alias: None,
+            }],
+        };
+        let planner = LanceNativePlanner::with_catalog(person_knows_config(), make_catalog());
+        let df_plan = planner.plan(&plan).unwrap();
+        let s = format!("{:?}", df_plan);
+        assert!(s.contains("CsrExpand"), "missing CsrExpand: {}", s);
+        assert!(s.contains("LanceTake"), "missing LanceTake: {}", s);
+    }
+
+    #[test]
+    fn test_unsupported_falls_back_to_join() {
+        // Variable-length expand must fall back to the DataFusion join path.
+        let vlexpand = LogicalOperator::VariableLengthExpand {
+            input: Box::new(person_scan("a")),
+            source_variable: "a".into(),
+            target_variable: "b".into(),
+            relationship_types: vec!["KNOWS".into()],
+            direction: RelationshipDirection::Outgoing,
+            relationship_variable: None,
+            min_length: Some(1),
+            max_length: Some(2),
+            target_properties: Default::default(),
+        };
+        let planner = LanceNativePlanner::with_catalog(person_knows_config(), make_catalog());
+        let df_plan = planner.plan(&vlexpand).unwrap();
+        let s = format!("{:?}", df_plan);
+        assert!(!s.contains("CsrExpand"), "should not be native: {}", s);
+    }
+}
+```
+
+This task also requires two `pub(crate)` accessors on `DataFusionPlanner`. Add them in `crates/lance-graph/src/datafusion_planner/mod.rs` inside `impl DataFusionPlanner`:
+
+```rust
+    /// Access the catalog, if any (used by the native planner).
+    pub(crate) fn catalog_ref(
+        &self,
+    ) -> Option<&Arc<dyn GraphSourceCatalog>> {
+        self.catalog.as_ref()
+    }
+```
+
+(`build_simple_project`, `build_project_with_aggregates`, and `build_scan` are already `pub(crate)`. `contains_aggregate` lives in `expression`, now `pub(crate)`.)
+
+- [ ] **Step 3: Run tests to verify they fail, then pass**
+
+Run: `cargo test -p lance-graph -- lance_native_planner::tests`
+Expected: compiles and PASSES (5 tests). If `build_project_with_aggregates` is not `pub(crate)`, change its visibility in `crates/lance-graph/src/datafusion_planner/builder/aggregate_ops.rs` from `fn` to `pub(crate) fn` (verify; `build_simple_project` is already `pub(crate)`).
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add crates/lance-graph/src/lance_native_planner/ crates/lance-graph/src/datafusion_planner/
+git commit -m "feat(native): LanceNativePlanner single-hop lowering with DataFusion fallback"
+```
+
+---
+
+## Task 7: Wire `ExecutionStrategy::LanceNative` and end-to-end parity tests
+
+**Files:**
+- Modify: `crates/lance-graph/src/query.rs` (the two `LanceNative` arms; native context + planner)
+- Create: `crates/lance-graph/tests/test_lance_native_expand.rs`
+
+- [ ] **Step 1: Write the failing end-to-end test**
+
+Create `crates/lance-graph/tests/test_lance_native_expand.rs`:
+
+```rust
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! End-to-end parity tests: native CSR expand vs DataFusion join path.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use arrow_array::{Int64Array, RecordBatch, StringArray, UInt64Array};
+use arrow_schema::{DataType, Field, Schema};
+use lance_graph::config::GraphConfig;
+use lance_graph::query::{CypherQuery, ExecutionStrategy};
+
+fn person_batch() -> RecordBatch {
+    // Dense ids 0..4 (row id == id_field value).
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::UInt64, false),
+        Field::new("name", DataType::Utf8, false),
+        Field::new("age", DataType::Int64, false),
+    ]));
+    RecordBatch::try_new(
+        schema,
+        vec![
+            Arc::new(UInt64Array::from(vec![0u64, 1, 2, 3])),
+            Arc::new(StringArray::from(vec!["alice", "bob", "carol", "dave"])),
+            Arc::new(Int64Array::from(vec![30i64, 40, 25, 50])),
+        ],
+    )
+    .unwrap()
+}
+
+fn knows_batch() -> RecordBatch {
+    // 0->1, 0->2, 1->3, 2->3
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("src_id", DataType::UInt64, false),
+        Field::new("dst_id", DataType::UInt64, false),
+    ]));
+    RecordBatch::try_new(
+        schema,
+        vec![
+            Arc::new(UInt64Array::from(vec![0u64, 0, 1, 2])),
+            Arc::new(UInt64Array::from(vec![1u64, 2, 3, 3])),
+        ],
+    )
+    .unwrap()
+}
+
+fn config() -> GraphConfig {
+    GraphConfig::builder()
+        .with_node_label("Person", "id")
+        .with_relationship("KNOWS", "src_id", "dst_id")
+        .build()
+        .unwrap()
+}
+
+fn datasets() -> HashMap<String, RecordBatch> {
+    let mut d = HashMap::new();
+    d.insert("Person".to_string(), person_batch());
+    d.insert("KNOWS".to_string(), knows_batch());
+    d
+}
+
+/// Collect (a.name, b.name) rows as a sorted Vec for order-independent compare.
+fn name_pairs(batch: &RecordBatch) -> Vec<(String, String)> {
+    let cols: Vec<&StringArray> = (0..batch.num_columns())
+        .map(|i| batch.column(i).as_any().downcast_ref::<StringArray>().unwrap())
+        .collect();
+    let mut rows: Vec<(String, String)> = (0..batch.num_rows())
+        .map(|r| (cols[0].value(r).to_string(), cols[1].value(r).to_string()))
+        .collect();
+    rows.sort();
+    rows
+}
+
+#[tokio::test]
+async fn test_native_expand_matches_datafusion_names() {
+    let q = "MATCH (a:Person)-[:KNOWS]->(b:Person) RETURN a.name, b.name";
+    let query = CypherQuery::new(q).unwrap().with_config(config());
+
+    let native = query
+        .execute(datasets(), Some(ExecutionStrategy::LanceNative))
+        .await
+        .unwrap();
+    let df = query
+        .execute(datasets(), Some(ExecutionStrategy::DataFusion))
+        .await
+        .unwrap();
+
+    let expected = vec![
+        ("alice".to_string(), "bob".to_string()),
+        ("alice".to_string(), "carol".to_string()),
+        ("bob".to_string(), "dave".to_string()),
+        ("carol".to_string(), "dave".to_string()),
+    ];
+    assert_eq!(name_pairs(&native), expected);
+    assert_eq!(name_pairs(&native), name_pairs(&df));
+}
+
+#[tokio::test]
+async fn test_native_expand_with_target_filter() {
+    let q = "MATCH (a:Person)-[:KNOWS]->(b:Person) WHERE b.age > 30 RETURN a.name, b.name";
+    let query = CypherQuery::new(q).unwrap().with_config(config());
+
+    let native = query
+        .execute(datasets(), Some(ExecutionStrategy::LanceNative))
+        .await
+        .unwrap();
+    let df = query
+        .execute(datasets(), Some(ExecutionStrategy::DataFusion))
+        .await
+        .unwrap();
+    assert_eq!(name_pairs(&native), name_pairs(&df));
+    // bob(40) and dave(50) qualify as targets: (alice,bob),(bob,dave),(carol,dave)
+    assert_eq!(
+        name_pairs(&native),
+        vec![
+            ("alice".to_string(), "bob".to_string()),
+            ("bob".to_string(), "dave".to_string()),
+            ("carol".to_string(), "dave".to_string()),
+        ]
+    );
+}
+
+#[tokio::test]
+async fn test_native_expand_incoming_matches_datafusion() {
+    let q = "MATCH (a:Person)<-[:KNOWS]-(b:Person) RETURN a.name, b.name";
+    let query = CypherQuery::new(q).unwrap().with_config(config());
+    let native = query
+        .execute(datasets(), Some(ExecutionStrategy::LanceNative))
+        .await
+        .unwrap();
+    let df = query
+        .execute(datasets(), Some(ExecutionStrategy::DataFusion))
+        .await
+        .unwrap();
+    assert_eq!(name_pairs(&native), name_pairs(&df));
+}
+
+#[tokio::test]
+async fn test_native_varlength_falls_back_and_matches() {
+    // Variable-length path is unsupported natively; LanceNative must fall back
+    // and produce the same result as DataFusion.
+    let q = "MATCH (a:Person)-[:KNOWS*1..2]->(b:Person) RETURN a.name, b.name";
+    let query = CypherQuery::new(q).unwrap().with_config(config());
+    let native = query
+        .execute(datasets(), Some(ExecutionStrategy::LanceNative))
+        .await
+        .unwrap();
+    let df = query
+        .execute(datasets(), Some(ExecutionStrategy::DataFusion))
+        .await
+        .unwrap();
+    assert_eq!(name_pairs(&native), name_pairs(&df));
+}
+```
+
+- [ ] **Step 2: Run to verify it fails**
+
+Run: `cargo test -p lance-graph --test test_lance_native_expand`
+Expected: FAIL — `LanceNative` currently returns `UnsupportedFeature`.
+
+- [ ] **Step 3: Wire the native execution path in query.rs**
+
+In `crates/lance-graph/src/query.rs`:
+
+(a) Change the private context builder to optionally install the query planner.
+Replace the signature and `SessionContext::new()` line of
+`build_catalog_and_context_from_datasets` (around line 608 and 627):
+
+```rust
+    async fn build_catalog_and_context_from_datasets(
+        &self,
+        datasets: HashMap<String, arrow::record_batch::RecordBatch>,
+        native: bool,
+    ) -> Result<(
+        lance_graph_catalog::InMemoryCatalog,
+        datafusion::execution::context::SessionContext,
+    )> {
+        use datafusion::datasource::{DefaultTableSource, MemTable};
+        use datafusion::execution::context::SessionContext;
+        use lance_graph_catalog::InMemoryCatalog;
+        use std::sync::Arc;
+
+        if datasets.is_empty() {
+            return Err(GraphError::ConfigError {
+                message: "No input datasets provided".to_string(),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            });
+        }
+
+        // Create session context (with the CSR query planner when native).
+        let ctx = if native {
+            use datafusion::execution::session_state::SessionStateBuilder;
+            let state = SessionStateBuilder::new()
+                .with_default_features()
+                .with_query_planner(Arc::new(crate::lance_native_planner::CsrQueryPlanner::new()))
+                .build();
+            SessionContext::new_with_state(state)
+        } else {
+            SessionContext::new()
+        };
+        let mut catalog = InMemoryCatalog::new();
+```
+
+(Leave the rest of the method body — the dataset registration loop and the
+`Ok((catalog, ctx))` return — unchanged.)
+
+(b) Update the three existing callers to pass `false`:
+- `execute_datafusion` (around line 599): `self.build_catalog_and_context_from_datasets(datasets, false)`
+- `explain` (around line 329): `self.build_catalog_and_context_from_datasets(datasets, false)`
+- `to_sql` (around line 369): `self.build_catalog_and_context_from_datasets(datasets, false)`
+
+(c) Add the `create_logical_plans_native` helper next to `create_logical_plans`
+(after line 841):
+
+```rust
+    fn create_logical_plans_native(
+        &self,
+        catalog: std::sync::Arc<dyn lance_graph_catalog::GraphSourceCatalog>,
+    ) -> Result<datafusion::logical_expr::LogicalPlan> {
+        use crate::datafusion_planner::GraphPhysicalPlanner;
+        use crate::lance_native_planner::LanceNativePlanner;
+        use crate::semantic::SemanticAnalyzer;
+
+        let config = self.require_config()?;
+
+        let mut analyzer = SemanticAnalyzer::new(config.clone());
+        let semantic = analyzer.analyze(&self.ast, &self.parameters)?;
+        if !semantic.errors.is_empty() {
+            return Err(GraphError::PlanError {
+                message: format!("Semantic analysis failed:\n{}", semantic.errors.join("\n")),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            });
+        }
+
+        let mut logical_planner = crate::logical_plan::LogicalPlanner::new(config);
+        let logical_plan = logical_planner.plan(&semantic.ast)?;
+
+        let native = LanceNativePlanner::with_catalog(config.clone(), catalog);
+        native.plan(&logical_plan)
+    }
+```
+
+Make sure `LanceNativePlanner` is re-exported: in
+`crates/lance-graph/src/lance_native_planner/mod.rs` it is already `pub struct`,
+and `crate::lance_native_planner::LanceNativePlanner` is accessible since
+`lib.rs` has `pub mod lance_native_planner;`.
+
+(d) Add the native execute method (after `execute_datafusion`, ~line 604):
+
+```rust
+    async fn execute_lance_native(
+        &self,
+        datasets: HashMap<String, arrow::record_batch::RecordBatch>,
+    ) -> Result<arrow::record_batch::RecordBatch> {
+        use arrow::compute::concat_batches;
+        use std::sync::Arc;
+
+        let (catalog, ctx) = self
+            .build_catalog_and_context_from_datasets(datasets, true)
+            .await?;
+
+        let df_logical_plan = self.create_logical_plans_native(Arc::new(catalog))?;
+
+        let df = ctx
+            .execute_logical_plan(df_logical_plan)
+            .await
+            .map_err(|e| GraphError::ExecutionError {
+                message: format!("Failed to execute native plan: {}", e),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?;
+        let result_schema = df.schema().inner().clone();
+        let batches = df.collect().await.map_err(|e| GraphError::ExecutionError {
+            message: format!("Failed to collect native results: {}", e),
+            location: snafu::Location::new(file!(), line!(), column!()),
+        })?;
+        concat_batches(&result_schema, &batches).map_err(|e| GraphError::ExecutionError {
+            message: format!("Failed to concat native results: {}", e),
+            location: snafu::Location::new(file!(), line!(), column!()),
+        })
+    }
+```
+
+(e) Route the in-memory `execute` arm (around line 234-240): replace the
+`ExecutionStrategy::LanceNative => Err(...)` arm with:
+
+```rust
+            ExecutionStrategy::LanceNative => self.execute_lance_native(datasets).await,
+```
+
+(Leave the `execute_with_namespace_internal` `LanceNative` arm returning
+`UnsupportedFeature` — namespace native execution is Phase 4.)
+
+- [ ] **Step 4: Run the end-to-end tests**
+
+Run: `cargo test -p lance-graph --test test_lance_native_expand`
+Expected: PASS (4 tests).
+
+- [ ] **Step 5: Run the full crate test suite**
+
+Run: `cargo test -p lance-graph`
+Expected: PASS (all prior tests + new ones). Then `cargo clippy -p lance-graph --all-targets` — fix any warnings (e.g. unused imports).
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add crates/lance-graph/src/query.rs crates/lance-graph/tests/test_lance_native_expand.rs
+git commit -m "feat(native): wire LanceNative execution strategy + e2e parity tests"
+```
+
+---
+
+## Self-Review
+
+**Spec coverage:**
+- Custom DataFusion `ExecutionPlan` execution model → Tasks 3, 4, 5 (nodes, execs, planner). ✓
+- Dense-ROWID id model (vertex id == row id; source `a__<id_field>`, neighbor `b__<id_field>`) → Task 6 `build_expand_native`. ✓
+- Output via `take()`, materialize all target columns except id → Task 4 + Task 6 `take_cols`. ✓
+- Separate `LanceTakeExec` + `RowMaterializer` (arrow take now, Lance later) → Task 4. ✓
+- Planner reuse + override only `Expand`; fallback for var-length/multi-hop/multi-type/undirected/Join/Unwind → Task 6 `can_plan_natively`/`build_native`. ✓
+- CSR built at physical-planning time with real `rel_map` columns (reversed for incoming) → Task 1 + Task 5. ✓
+- query.rs wiring: in-memory `execute` native path wired; namespace native stays `UnsupportedFeature` → Task 7. ✓
+- Tests: operator units (Tasks 3,4), planner native/fallback (Task 6), e2e parity incl. filter, incoming, fallback (Task 7). ✓
+
+**Placeholder scan:** No TBD/TODO; all steps contain concrete code and commands.
+
+**Type consistency:** `expand_batch`/`take_batch` signatures match their `execute` call sites; `CsrExpandNode`/`LanceTakeNode` field names match construction in Task 6 and downcast use in Task 5; `add_edges_from_batch_with_columns` signature matches Task 5 usage; `CsrQueryPlanner::new` / `with_catalog` / `build_simple_project` / `build_project_with_aggregates` / `catalog_ref` names consistent across tasks.
+
+**Risk notes for the implementer:**
+- DataFusion 50.3 API drift: if `PlanProperties::new`, `EmissionType`/`Boundedness` import paths, or `ExtensionPlanner`/`QueryPlanner` async signatures differ, consult `cargo doc -p datafusion --open` for the exact paths; the field/method *names* used here are stable across 49–50.
+- If `build_project_with_aggregates` is private, widen it to `pub(crate)` (Task 6, Step 3).
+- `with_default_features()` on `SessionStateBuilder` is required so standard scalar/aggregate functions resolve in the native context.

From 3df32d7dd0e8afcb737dd514218f0605069928a5 Mon Sep 17 00:00:00 2001
From: "jianjian.xie" <jja725@gmail.com>
Date: Mon, 22 Jun 2026 15:17:26 -0700
Subject: [PATCH 03/10] feat(csr): add_edges_from_batch_with_columns for custom
 edge column names

---
 crates/lance-graph/src/csr_index.rs | 75 +++++++++++++++++++++++------
 1 file changed, 59 insertions(+), 16 deletions(-)

diff --git a/crates/lance-graph/src/csr_index.rs b/crates/lance-graph/src/csr_index.rs
index 176ef54d..1d3d7200 100644
--- a/crates/lance-graph/src/csr_index.rs
+++ b/crates/lance-graph/src/csr_index.rs
@@ -237,32 +237,43 @@ impl CsrIndexBuilder {
     }
 
     /// Add edges from an Arrow RecordBatch with `src_id` and `dst_id` columns.
-    pub fn add_edges_from_batch(mut self, batch: &RecordBatch) -> Result<Self> {
-        let src_col = batch
-            .column_by_name("src_id")
-            .ok_or_else(|| GraphError::PlanError {
-                message: "Edge batch missing 'src_id' column".to_string(),
-                location: snafu::Location::new(file!(), line!(), column!()),
-            })?;
-        let dst_col = batch
-            .column_by_name("dst_id")
+    pub fn add_edges_from_batch(self, batch: &RecordBatch) -> Result<Self> {
+        self.add_edges_from_batch_with_columns(batch, "src_id", "dst_id")
+    }
+
+    /// Add edges from an Arrow RecordBatch, reading source vertex ids from
+    /// `src_col` and destination vertex ids from `dst_col`.
+    ///
+    /// Both columns must be `UInt64`. To build a reversed (incoming/CSC) index,
+    /// pass the destination column name as `src_col` and vice versa.
+    pub fn add_edges_from_batch_with_columns(
+        mut self,
+        batch: &RecordBatch,
+        src_col: &str,
+        dst_col: &str,
+    ) -> Result<Self> {
+        let src_array = batch
+            .column_by_name(src_col)
             .ok_or_else(|| GraphError::PlanError {
-                message: "Edge batch missing 'dst_id' column".to_string(),
+                message: format!("Edge batch missing '{}' column", src_col),
                 location: snafu::Location::new(file!(), line!(), column!()),
-            })?;
-
-        let src_array = src_col
+            })?
             .as_any()
             .downcast_ref::<UInt64Array>()
             .ok_or_else(|| GraphError::PlanError {
-                message: "src_id column must be UInt64".to_string(),
+                message: format!("'{}' column must be UInt64", src_col),
                 location: snafu::Location::new(file!(), line!(), column!()),
             })?;
-        let dst_array = dst_col
+        let dst_array = batch
+            .column_by_name(dst_col)
+            .ok_or_else(|| GraphError::PlanError {
+                message: format!("Edge batch missing '{}' column", dst_col),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?
             .as_any()
             .downcast_ref::<UInt64Array>()
             .ok_or_else(|| GraphError::PlanError {
-                message: "dst_id column must be UInt64".to_string(),
+                message: format!("'{}' column must be UInt64", dst_col),
                 location: snafu::Location::new(file!(), line!(), column!()),
             })?;
 
@@ -651,4 +662,36 @@ mod tests {
         assert_eq!(idx.neighbors(0), &[1, 1, 1]);
         assert_eq!(idx.degree(0), 3);
     }
+
+    #[test]
+    fn test_build_from_record_batch_custom_columns() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("src_person_id", DataType::UInt64, false),
+            Field::new("dst_person_id", DataType::UInt64, false),
+        ]));
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(UInt64Array::from(vec![0, 0, 1])),
+                Arc::new(UInt64Array::from(vec![1, 2, 2])),
+            ],
+        )
+        .unwrap();
+
+        // Forward (outgoing): src -> dst
+        let idx = CsrIndexBuilder::new()
+            .add_edges_from_batch_with_columns(&batch, "src_person_id", "dst_person_id")
+            .unwrap()
+            .build();
+        assert_eq!(idx.neighbors(0), &[1, 2]);
+        assert_eq!(idx.neighbors(1), &[2]);
+
+        // Reversed (incoming): swap the column args -> dst -> src
+        let rev = CsrIndexBuilder::new()
+            .add_edges_from_batch_with_columns(&batch, "dst_person_id", "src_person_id")
+            .unwrap()
+            .build();
+        assert_eq!(rev.neighbors(2), &[0, 1]);
+        assert_eq!(rev.neighbors(1), &[0]);
+    }
 }

From 0c434085ba507a6cb83d9c3b863550c6515d2b53 Mon Sep 17 00:00:00 2001
From: "jianjian.xie" <jja725@gmail.com>
Date: Mon, 22 Jun 2026 15:24:53 -0700
Subject: [PATCH 04/10] refactor(native): promote lance_native_planner to
 module dir; add NativeDirection

---
 .../lance-graph/src/lance_native_planner.rs   | 77 -------------------
 .../src/lance_native_planner/direction.rs     | 14 ++++
 .../src/lance_native_planner/mod.rs           | 42 ++++++++++
 3 files changed, 56 insertions(+), 77 deletions(-)
 delete mode 100644 crates/lance-graph/src/lance_native_planner.rs
 create mode 100644 crates/lance-graph/src/lance_native_planner/direction.rs
 create mode 100644 crates/lance-graph/src/lance_native_planner/mod.rs

diff --git a/crates/lance-graph/src/lance_native_planner.rs b/crates/lance-graph/src/lance_native_planner.rs
deleted file mode 100644
index a500d6a9..00000000
--- a/crates/lance-graph/src/lance_native_planner.rs
+++ /dev/null
@@ -1,77 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The Lance Authors
-
-//! Lance Native physical planner (placeholder)
-//!
-//! This planner is intended to compile logical graph plans into a physical
-//! execution plan that leverages Lance's native scan and filter engine.
-//!
-//! For now, this is a placeholder implementation that conforms to the
-//! `GraphPhysicalPlanner` trait and returns an empty DataFusion logical plan
-//! until the native pipeline is wired up.
-
-use crate::config::GraphConfig;
-use crate::datafusion_planner::GraphPhysicalPlanner;
-use crate::error::Result;
-use crate::logical_plan::LogicalOperator;
-use datafusion::common::DFSchema;
-use datafusion::logical_expr::{EmptyRelation, LogicalPlan};
-use std::sync::Arc;
-
-/// Placeholder Lance-native planner
-pub struct LanceNativePlanner {
-    #[allow(dead_code)]
-    config: GraphConfig,
-}
-
-impl LanceNativePlanner {
-    pub fn new(config: GraphConfig) -> Self {
-        Self { config }
-    }
-}
-
-impl GraphPhysicalPlanner for LanceNativePlanner {
-    fn plan(&self, _logical_plan: &LogicalOperator) -> Result<LogicalPlan> {
-        // Placeholder: return an empty relation. A future implementation will
-        // produce a runnable pipeline using Lance's native execution engine.
-        let schema = Arc::new(DFSchema::empty());
-        Ok(LogicalPlan::EmptyRelation(EmptyRelation {
-            produce_one_row: false,
-            schema,
-        }))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_lance_native_planner_placeholder() {
-        let cfg = GraphConfig::builder()
-            .with_node_label("Person", "id")
-            .build()
-            .unwrap();
-        let planner = LanceNativePlanner::new(cfg);
-        // Minimal logical plan to feed into placeholder
-        let lp = LogicalOperator::Distinct {
-            input: Box::new(LogicalOperator::Limit {
-                input: Box::new(LogicalOperator::Project {
-                    input: Box::new(LogicalOperator::ScanByLabel {
-                        variable: "n".to_string(),
-                        label: "Person".to_string(),
-                        properties: Default::default(),
-                    }),
-                    projections: vec![],
-                }),
-                count: 1,
-            }),
-        };
-        let df_plan = planner.plan(&lp).unwrap();
-        // Empty relation is acceptable as a placeholder
-        match df_plan {
-            LogicalPlan::EmptyRelation(_) => {}
-            _ => panic!("expected empty relation placeholder"),
-        }
-    }
-}
diff --git a/crates/lance-graph/src/lance_native_planner/direction.rs b/crates/lance-graph/src/lance_native_planner/direction.rs
new file mode 100644
index 00000000..5bc9c105
--- /dev/null
+++ b/crates/lance-graph/src/lance_native_planner/direction.rs
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Traversal direction for the native CSR expand operators.
+
+/// Direction a single-hop expand traverses. `Undirected` is intentionally
+/// absent — undirected expands fall back to the DataFusion join planner.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum NativeDirection {
+    /// Follow edges source -> destination (CSR).
+    Outgoing,
+    /// Follow edges destination -> source (CSC / reversed).
+    Incoming,
+}
diff --git a/crates/lance-graph/src/lance_native_planner/mod.rs b/crates/lance-graph/src/lance_native_planner/mod.rs
new file mode 100644
index 00000000..1a12aed1
--- /dev/null
+++ b/crates/lance-graph/src/lance_native_planner/mod.rs
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Lance Native physical planner (placeholder)
+//!
+//! Rewritten in a later task to lower single-hop `Expand` onto CSR-backed
+//! extension nodes. For now it keeps the original placeholder behavior so the
+//! crate compiles between tasks.
+
+mod direction;
+
+pub use direction::NativeDirection;
+
+use crate::config::GraphConfig;
+use crate::datafusion_planner::GraphPhysicalPlanner;
+use crate::error::Result;
+use crate::logical_plan::LogicalOperator;
+use datafusion::common::DFSchema;
+use datafusion::logical_expr::{EmptyRelation, LogicalPlan};
+use std::sync::Arc;
+
+/// Placeholder Lance-native planner
+pub struct LanceNativePlanner {
+    #[allow(dead_code)]
+    config: GraphConfig,
+}
+
+impl LanceNativePlanner {
+    pub fn new(config: GraphConfig) -> Self {
+        Self { config }
+    }
+}
+
+impl GraphPhysicalPlanner for LanceNativePlanner {
+    fn plan(&self, _logical_plan: &LogicalOperator) -> Result<LogicalPlan> {
+        let schema = Arc::new(DFSchema::empty());
+        Ok(LogicalPlan::EmptyRelation(EmptyRelation {
+            produce_one_row: false,
+            schema,
+        }))
+    }
+}

From 5fc76648a6499d0b6a00076093927cbe30b85d0d Mon Sep 17 00:00:00 2001
From: "jianjian.xie" <jja725@gmail.com>
Date: Mon, 22 Jun 2026 15:34:57 -0700
Subject: [PATCH 05/10] feat(native): CsrExpandNode/Exec and expand_batch core

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
---
 .../src/lance_native_planner/csr_expand.rs    | 351 ++++++++++++++++++
 .../src/lance_native_planner/mod.rs           |   1 +
 2 files changed, 352 insertions(+)
 create mode 100644 crates/lance-graph/src/lance_native_planner/csr_expand.rs

diff --git a/crates/lance-graph/src/lance_native_planner/csr_expand.rs b/crates/lance-graph/src/lance_native_planner/csr_expand.rs
new file mode 100644
index 00000000..7f83da7f
--- /dev/null
+++ b/crates/lance-graph/src/lance_native_planner/csr_expand.rs
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Native single-hop expand: logical node + physical operator + core function.
+//!
+//! `CsrExpandExec` does topology only — for each input row it looks up the
+//! source vertex's neighbors in the CSR index and emits one output row per
+//! neighbor, carrying through all input columns and appending the neighbor row
+//! id as a new column. Target property materialization is handled separately by
+//! `LanceTakeExec`.
+
+use std::any::Any;
+use std::fmt;
+use std::sync::Arc;
+
+use arrow::compute::{cast, take};
+use arrow_array::{Array, ArrayRef, RecordBatch, UInt32Array, UInt64Array};
+use arrow_schema::{DataType, Field, SchemaRef};
+use datafusion::common::{DFSchemaRef, Result as DFResult};
+use datafusion::execution::TaskContext;
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion::physical_expr::EquivalenceProperties;
+use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
+use datafusion::physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
+    SendableRecordBatchStream,
+};
+use futures::StreamExt;
+
+use super::direction::NativeDirection;
+use crate::csr_index::CsrIndex;
+use crate::error::{GraphError, Result};
+
+/// Expand one input batch: for every input row, append one output row per
+/// neighbor of that row's source vertex.
+///
+/// `source_id_idx` is the column index of the source vertex id within `input`.
+/// `neighbor_field` is the appended column (its data type is the target id
+/// field's type; neighbor ids are cast into it). `out_schema` must equal
+/// `input.schema()` fields followed by `neighbor_field`.
+pub(crate) fn expand_batch(
+    input: &RecordBatch,
+    source_id_idx: usize,
+    csr: &CsrIndex,
+    neighbor_field: &Field,
+    out_schema: &SchemaRef,
+) -> Result<RecordBatch> {
+    let map_err = |e: arrow_schema::ArrowError, what: &str| GraphError::ExecutionError {
+        message: format!("CsrExpand {}: {}", what, e),
+        location: snafu::Location::new(file!(), line!(), column!()),
+    };
+
+    // Source ids may be any integer type; normalize to u64.
+    let src_u64 = cast(input.column(source_id_idx), &DataType::UInt64)
+        .map_err(|e| map_err(e, "cast source id to u64"))?;
+    // Arrow 56.x: cast(_, UInt64) always yields a plain UInt64Array.
+    let src = src_u64
+        .as_any()
+        .downcast_ref::<UInt64Array>()
+        .expect("cast to UInt64 yields UInt64Array");
+
+    let mut parent_idx: Vec<u32> = Vec::with_capacity(input.num_rows());
+    let mut neighbors: Vec<u64> = Vec::with_capacity(input.num_rows());
+    for row in 0..input.num_rows() {
+        if src.is_null(row) {
+            continue;
+        }
+        let row_u32 = u32::try_from(row).map_err(|_| GraphError::ExecutionError {
+            message: "CsrExpand: input batch row index exceeds u32::MAX".to_string(),
+            location: snafu::Location::new(file!(), line!(), column!()),
+        })?;
+        for &n in csr.neighbors(src.value(row)) {
+            parent_idx.push(row_u32);
+            neighbors.push(n);
+        }
+    }
+
+    let take_idx = UInt32Array::from(parent_idx);
+    let mut cols: Vec<ArrayRef> = Vec::with_capacity(input.num_columns() + 1);
+    for c in input.columns() {
+        cols.push(take(c, &take_idx, None).map_err(|e| map_err(e, "take carried column"))?);
+    }
+    let neigh_u64 = Arc::new(UInt64Array::from(neighbors)) as ArrayRef;
+    let neigh_col = cast(&neigh_u64, neighbor_field.data_type())
+        .map_err(|e| map_err(e, "cast neighbor id"))?;
+    cols.push(neigh_col);
+
+    RecordBatch::try_new(out_schema.clone(), cols).map_err(|e| GraphError::ExecutionError {
+        message: format!("CsrExpand build output batch: {}", e),
+        location: snafu::Location::new(file!(), line!(), column!()),
+    })
+}
+
+/// Logical extension node for a single-hop CSR expand.
+///
+/// Holds only hashable metadata; the physical operator (and its `CsrIndex`) is
+/// constructed by the extension planner at physical-planning time.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct CsrExpandNode {
+    /// Source subplan (a node scan, optionally with a source-only filter).
+    pub input: LogicalPlan,
+    /// Relationship type (lowercased table name to look up the edge table).
+    pub rel_type: String,
+    /// Edge table column holding source vertex ids.
+    pub src_field: String,
+    /// Edge table column holding destination vertex ids.
+    pub dst_field: String,
+    /// Traversal direction.
+    pub direction: NativeDirection,
+    /// Qualified column in `input` carrying the source vertex id (e.g. `a__id`).
+    pub source_id_column: String,
+    /// Qualified output column for the neighbor row id (e.g. `b__id`).
+    pub neighbor_column: String,
+    /// Arrow data type of the neighbor column (target id field's type).
+    pub neighbor_data_type: DataType,
+    /// Output schema = input schema + neighbor column.
+    pub schema: DFSchemaRef,
+}
+
+impl PartialOrd for CsrExpandNode {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        (
+            &self.rel_type,
+            &self.src_field,
+            &self.dst_field,
+            &self.source_id_column,
+            &self.neighbor_column,
+        )
+            .partial_cmp(&(
+                &other.rel_type,
+                &other.src_field,
+                &other.dst_field,
+                &other.source_id_column,
+                &other.neighbor_column,
+            ))
+    }
+}
+
+impl UserDefinedLogicalNodeCore for CsrExpandNode {
+    fn name(&self) -> &str {
+        "CsrExpand"
+    }
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "CsrExpand: rel={}, dir={:?}, src={}, neighbor={}",
+            self.rel_type, self.direction, self.source_id_column, self.neighbor_column
+        )
+    }
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<Expr>,
+        mut inputs: Vec<LogicalPlan>,
+    ) -> DFResult<Self> {
+        Ok(Self {
+            input: inputs.remove(0),
+            ..self.clone()
+        })
+    }
+}
+
+/// Physical operator for `CsrExpandNode`.
+#[derive(Debug)]
+pub struct CsrExpandExec {
+    input: Arc<dyn ExecutionPlan>,
+    csr: Arc<CsrIndex>,
+    source_id_idx: usize,
+    neighbor_field: Field,
+    out_schema: SchemaRef,
+    props: PlanProperties,
+}
+
+impl CsrExpandExec {
+    pub fn new(
+        input: Arc<dyn ExecutionPlan>,
+        csr: Arc<CsrIndex>,
+        source_id_idx: usize,
+        neighbor_field: Field,
+        out_schema: SchemaRef,
+    ) -> Self {
+        let props = PlanProperties::new(
+            EquivalenceProperties::new(out_schema.clone()),
+            Partitioning::UnknownPartitioning(1),
+            EmissionType::Incremental,
+            Boundedness::Bounded,
+        );
+        Self {
+            input,
+            csr,
+            source_id_idx,
+            neighbor_field,
+            out_schema,
+            props,
+        }
+    }
+}
+
+impl DisplayAs for CsrExpandExec {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "CsrExpandExec: neighbor={}", self.neighbor_field.name())
+    }
+}
+
+impl ExecutionPlan for CsrExpandExec {
+    fn name(&self) -> &str {
+        "CsrExpandExec"
+    }
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn properties(&self) -> &PlanProperties {
+        &self.props
+    }
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> DFResult<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(CsrExpandExec::new(
+            children[0].clone(),
+            self.csr.clone(),
+            self.source_id_idx,
+            self.neighbor_field.clone(),
+            self.out_schema.clone(),
+        )))
+    }
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> DFResult<SendableRecordBatchStream> {
+        let input = self.input.execute(partition, context)?;
+        let csr = self.csr.clone();
+        let idx = self.source_id_idx;
+        let field = self.neighbor_field.clone();
+        let out_schema = self.out_schema.clone();
+        let out_schema_for_stream = out_schema.clone();
+        let stream = input.map(move |rb| {
+            let rb = rb?;
+            expand_batch(&rb, idx, &csr, &field, &out_schema)
+                .map_err(|e| datafusion::error::DataFusionError::Execution(e.to_string()))
+        });
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            out_schema_for_stream,
+            stream,
+        )))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_schema::Schema;
+    use crate::csr_index::CsrIndexBuilder;
+
+    fn input_batch() -> RecordBatch {
+        // a__id = [0,1,2,3], a__name = ["n0","n1","n2","n3"]
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a__id", DataType::UInt64, false),
+            Field::new("a__name", DataType::Utf8, false),
+        ]));
+        RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(UInt64Array::from(vec![0u64, 1, 2, 3])),
+                Arc::new(arrow_array::StringArray::from(vec!["n0", "n1", "n2", "n3"])),
+            ],
+        )
+        .unwrap()
+    }
+
+    fn out_schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            Field::new("a__id", DataType::UInt64, false),
+            Field::new("a__name", DataType::Utf8, false),
+            Field::new("b__id", DataType::UInt64, true),
+        ]))
+    }
+
+    #[test]
+    fn test_expand_batch_outgoing() {
+        // 0->1, 0->2, 1->2, 3-> (none)
+        let csr = CsrIndexBuilder::new()
+            .with_num_vertices(4)
+            .add_edge(0, 1)
+            .add_edge(0, 2)
+            .add_edge(1, 2)
+            .build();
+        let neighbor_field = Field::new("b__id", DataType::UInt64, true);
+        let out = expand_batch(&input_batch(), 0, &csr, &neighbor_field, &out_schema()).unwrap();
+
+        assert_eq!(out.num_rows(), 3);
+        let a_id = out.column(0).as_any().downcast_ref::<UInt64Array>().unwrap();
+        let b_id = out.column(2).as_any().downcast_ref::<UInt64Array>().unwrap();
+        let a_name = out
+            .column(1)
+            .as_any()
+            .downcast_ref::<arrow_array::StringArray>()
+            .unwrap();
+        // Rows: (0,n0,1) (0,n0,2) (1,n1,2)
+        assert_eq!(a_id.values(), &[0, 0, 1]);
+        assert_eq!(b_id.values(), &[1, 2, 2]);
+        assert_eq!(a_name.value(0), "n0");
+        assert_eq!(a_name.value(1), "n0");
+        assert_eq!(a_name.value(2), "n1");
+    }
+
+    #[test]
+    fn test_expand_batch_no_neighbors_and_out_of_range() {
+        let csr = CsrIndexBuilder::new().with_num_vertices(2).build(); // no edges
+        let neighbor_field = Field::new("b__id", DataType::UInt64, true);
+        let out = expand_batch(&input_batch(), 0, &csr, &neighbor_field, &out_schema()).unwrap();
+        assert_eq!(out.num_rows(), 0);
+    }
+
+    #[test]
+    fn test_expand_batch_casts_source_id_from_int64() {
+        // Source id column is Int64 (not UInt64): must still work.
+        let schema = Arc::new(Schema::new(vec![Field::new("a__id", DataType::Int64, false)]));
+        let input = RecordBatch::try_new(
+            schema,
+            vec![Arc::new(arrow_array::Int64Array::from(vec![0i64, 1]))],
+        )
+        .unwrap();
+        let out_schema = Arc::new(Schema::new(vec![
+            Field::new("a__id", DataType::Int64, false),
+            Field::new("b__id", DataType::UInt64, true),
+        ]));
+        let csr = CsrIndexBuilder::new()
+            .with_num_vertices(2)
+            .add_edge(0, 1)
+            .build();
+        let neighbor_field = Field::new("b__id", DataType::UInt64, true);
+        let out = expand_batch(&input, 0, &csr, &neighbor_field, &out_schema).unwrap();
+        assert_eq!(out.num_rows(), 1);
+        let b_id = out.column(1).as_any().downcast_ref::<UInt64Array>().unwrap();
+        assert_eq!(b_id.values(), &[1]);
+    }
+}
diff --git a/crates/lance-graph/src/lance_native_planner/mod.rs b/crates/lance-graph/src/lance_native_planner/mod.rs
index 1a12aed1..570025a4 100644
--- a/crates/lance-graph/src/lance_native_planner/mod.rs
+++ b/crates/lance-graph/src/lance_native_planner/mod.rs
@@ -8,6 +8,7 @@
 //! crate compiles between tasks.
 
 mod direction;
+mod csr_expand;
 
 pub use direction::NativeDirection;
 

From fce3edb83ade55f7509b904666a01e4f95ffbd0a Mon Sep 17 00:00:00 2001
From: "jianjian.xie" <jja725@gmail.com>
Date: Mon, 22 Jun 2026 15:50:36 -0700
Subject: [PATCH 06/10] feat(native): RowMaterializer, LanceTakeNode/Exec,
 take_batch core

---
 .../src/lance_native_planner/mod.rs           |   1 +
 .../src/lance_native_planner/take.rs          | 386 ++++++++++++++++++
 2 files changed, 387 insertions(+)
 create mode 100644 crates/lance-graph/src/lance_native_planner/take.rs

diff --git a/crates/lance-graph/src/lance_native_planner/mod.rs b/crates/lance-graph/src/lance_native_planner/mod.rs
index 570025a4..06f1ae28 100644
--- a/crates/lance-graph/src/lance_native_planner/mod.rs
+++ b/crates/lance-graph/src/lance_native_planner/mod.rs
@@ -9,6 +9,7 @@
 
 mod direction;
 mod csr_expand;
+mod take;
 
 pub use direction::NativeDirection;
 
diff --git a/crates/lance-graph/src/lance_native_planner/take.rs b/crates/lance-graph/src/lance_native_planner/take.rs
new file mode 100644
index 00000000..fc57d21f
--- /dev/null
+++ b/crates/lance-graph/src/lance_native_planner/take.rs
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Native materialization: take target node columns by row id.
+//!
+//! `CsrExpandExec` produces target *row ids*; `LanceTakeExec` turns those into
+//! target *properties* via a `RowMaterializer`. Under the dense-ROWID model the
+//! in-memory materializer is a direct `arrow::compute::take` by offset — the
+//! concrete reason CSR beats a hash join.
+
+use std::any::Any;
+use std::fmt;
+use std::sync::Arc;
+
+use arrow::compute::{cast, take};
+use arrow_array::{ArrayRef, RecordBatch, UInt64Array};
+use arrow_schema::{DataType, Field, SchemaRef};
+use datafusion::common::{DFSchemaRef, Result as DFResult};
+use datafusion::execution::TaskContext;
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion::physical_expr::EquivalenceProperties;
+use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
+use datafusion::physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
+    SendableRecordBatchStream,
+};
+use futures::StreamExt;
+
+use crate::error::{GraphError, Result};
+
+/// Materializes rows of a target node table by row id.
+pub trait RowMaterializer: Send + Sync + fmt::Debug {
+    /// Take `columns` (raw, unqualified names) for the given `row_ids`.
+    /// The returned batch has one row per element of `row_ids`, columns in the
+    /// requested order, named by their raw names.
+    fn take(&self, row_ids: &UInt64Array, columns: &[String]) -> Result<RecordBatch>;
+}
+
+/// In-memory materializer over a fully-collected target node batch. Row id ==
+/// offset into the batch (dense-ROWID model).
+#[derive(Debug)]
+pub struct InMemoryMaterializer {
+    batch: RecordBatch,
+}
+
+impl InMemoryMaterializer {
+    pub fn new(batch: RecordBatch) -> Self {
+        Self { batch }
+    }
+}
+
+impl RowMaterializer for InMemoryMaterializer {
+    fn take(&self, row_ids: &UInt64Array, columns: &[String]) -> Result<RecordBatch> {
+        let mut fields: Vec<Field> = Vec::with_capacity(columns.len());
+        let mut arrays: Vec<ArrayRef> = Vec::with_capacity(columns.len());
+        for name in columns {
+            let col = self
+                .batch
+                .column_by_name(name)
+                .ok_or_else(|| GraphError::ExecutionError {
+                    message: format!("take: target column '{}' not found", name),
+                    location: snafu::Location::new(file!(), line!(), column!()),
+                })?;
+            let taken = take(col, row_ids, None).map_err(|e| GraphError::ExecutionError {
+                message: format!("take: failed on column '{}': {}", name, e),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?;
+            let nullable = self
+                .batch
+                .schema()
+                .field_with_name(name)
+                .map(|f| f.is_nullable())
+                .unwrap_or(true);
+            fields.push(Field::new(name, col.data_type().clone(), nullable));
+            arrays.push(taken);
+        }
+        RecordBatch::try_new(Arc::new(arrow_schema::Schema::new(fields)), arrays).map_err(|e| {
+            GraphError::ExecutionError {
+                message: format!("take: build batch: {}", e),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            }
+        })
+    }
+}
+
+/// Append materialized target columns to one input batch.
+///
+/// `row_id_idx` is the index of the row-id column in `input`. `take_cols` are
+/// the raw target column names to materialize, in the same order as the
+/// appended fields of `out_schema`. `out_schema` = `input.schema()` followed by
+/// the qualified materialized columns.
+pub(crate) fn take_batch(
+    input: &RecordBatch,
+    row_id_idx: usize,
+    materializer: &dyn RowMaterializer,
+    take_cols: &[String],
+    out_schema: &SchemaRef,
+) -> Result<RecordBatch> {
+    // Nothing to materialize: output is the input re-stamped with out_schema.
+    // (When take_cols is empty, out_schema == input.schema().)
+    if take_cols.is_empty() {
+        return RecordBatch::try_new(out_schema.clone(), input.columns().to_vec()).map_err(|e| {
+            GraphError::ExecutionError {
+                message: format!("take: build output batch: {}", e),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            }
+        });
+    }
+
+    let ids_u64 = cast(input.column(row_id_idx), &DataType::UInt64).map_err(|e| {
+        GraphError::ExecutionError {
+            message: format!("take: cast row id to u64: {}", e),
+            location: snafu::Location::new(file!(), line!(), column!()),
+        }
+    })?;
+    // Arrow 56.x: cast(_, UInt64) always yields a plain UInt64Array.
+    let ids = ids_u64
+        .as_any()
+        .downcast_ref::<UInt64Array>()
+        .expect("cast to UInt64 yields UInt64Array");
+
+    let materialized = materializer.take(ids, take_cols)?;
+
+    let mut cols: Vec<ArrayRef> = input.columns().to_vec();
+    cols.extend(materialized.columns().iter().cloned());
+
+    RecordBatch::try_new(out_schema.clone(), cols).map_err(|e| GraphError::ExecutionError {
+        message: format!("take: build output batch: {}", e),
+        location: snafu::Location::new(file!(), line!(), column!()),
+    })
+}
+
+/// Logical extension node for materializing target columns via take().
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct LanceTakeNode {
+    /// Input subplan (a `CsrExpandNode`).
+    pub input: LogicalPlan,
+    /// Lowercased target node table name (to collect rows from).
+    pub target_table: String,
+    /// Qualified column in `input` holding the row ids (e.g. `b__id`).
+    pub row_id_column: String,
+    /// Raw (unqualified, lowercased) target columns to materialize, in output order.
+    pub take_cols: Vec<String>,
+    /// Output schema = input schema + qualified materialized columns.
+    pub schema: DFSchemaRef,
+}
+
+impl PartialOrd for LanceTakeNode {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        (&self.target_table, &self.row_id_column, &self.take_cols).partial_cmp(&(
+            &other.target_table,
+            &other.row_id_column,
+            &other.take_cols,
+        ))
+    }
+}
+
+impl UserDefinedLogicalNodeCore for LanceTakeNode {
+    fn name(&self) -> &str {
+        "LanceTake"
+    }
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "LanceTake: table={}, row_id={}, cols={:?}",
+            self.target_table, self.row_id_column, self.take_cols
+        )
+    }
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<Expr>,
+        mut inputs: Vec<LogicalPlan>,
+    ) -> DFResult<Self> {
+        Ok(Self {
+            input: inputs.remove(0),
+            ..self.clone()
+        })
+    }
+}
+
+/// Physical operator for `LanceTakeNode`.
+#[derive(Debug)]
+pub struct LanceTakeExec {
+    input: Arc<dyn ExecutionPlan>,
+    materializer: Arc<dyn RowMaterializer>,
+    row_id_idx: usize,
+    take_cols: Vec<String>,
+    out_schema: SchemaRef,
+    props: PlanProperties,
+}
+
+impl LanceTakeExec {
+    pub fn new(
+        input: Arc<dyn ExecutionPlan>,
+        materializer: Arc<dyn RowMaterializer>,
+        row_id_idx: usize,
+        take_cols: Vec<String>,
+        out_schema: SchemaRef,
+    ) -> Self {
+        let props = PlanProperties::new(
+            EquivalenceProperties::new(out_schema.clone()),
+            Partitioning::UnknownPartitioning(1),
+            EmissionType::Incremental,
+            Boundedness::Bounded,
+        );
+        Self {
+            input,
+            materializer,
+            row_id_idx,
+            take_cols,
+            out_schema,
+            props,
+        }
+    }
+}
+
+impl DisplayAs for LanceTakeExec {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "LanceTakeExec: cols={:?}", self.take_cols)
+    }
+}
+
+impl ExecutionPlan for LanceTakeExec {
+    fn name(&self) -> &str {
+        "LanceTakeExec"
+    }
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn properties(&self) -> &PlanProperties {
+        &self.props
+    }
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> DFResult<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(LanceTakeExec::new(
+            children[0].clone(),
+            self.materializer.clone(),
+            self.row_id_idx,
+            self.take_cols.clone(),
+            self.out_schema.clone(),
+        )))
+    }
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> DFResult<SendableRecordBatchStream> {
+        let input = self.input.execute(partition, context)?;
+        let materializer = self.materializer.clone();
+        let row_id_idx = self.row_id_idx;
+        let take_cols = self.take_cols.clone();
+        let out_schema = self.out_schema.clone();
+        let out_schema_for_stream = out_schema.clone();
+        let stream = input.map(move |rb| {
+            let rb = rb?;
+            take_batch(&rb, row_id_idx, materializer.as_ref(), &take_cols, &out_schema)
+                .map_err(|e| datafusion::error::DataFusionError::Execution(e.to_string()))
+        });
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            out_schema_for_stream,
+            stream,
+        )))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_array::StringArray;
+    use arrow_schema::Schema;
+
+    fn target_batch() -> RecordBatch {
+        // person table: id, name, age (raw, lowercased column names)
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::UInt64, false),
+            Field::new("name", DataType::Utf8, false),
+            Field::new("age", DataType::Int64, false),
+        ]));
+        RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(UInt64Array::from(vec![0u64, 1, 2])),
+                Arc::new(StringArray::from(vec!["alice", "bob", "carol"])),
+                Arc::new(arrow_array::Int64Array::from(vec![30i64, 40, 50])),
+            ],
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn test_in_memory_materializer_take_subset() {
+        let m = InMemoryMaterializer::new(target_batch());
+        let ids = UInt64Array::from(vec![2u64, 0]);
+        let out = m.take(&ids, &["name".to_string()]).unwrap();
+        assert_eq!(out.num_columns(), 1);
+        let names = out.column(0).as_any().downcast_ref::<StringArray>().unwrap();
+        assert_eq!(names.value(0), "carol");
+        assert_eq!(names.value(1), "alice");
+    }
+
+    #[test]
+    fn test_in_memory_materializer_missing_column_errors() {
+        let m = InMemoryMaterializer::new(target_batch());
+        let ids = UInt64Array::from(vec![0u64]);
+        assert!(m.take(&ids, &["nonexistent".to_string()]).is_err());
+    }
+
+    #[test]
+    fn test_take_batch_appends_qualified_columns() {
+        // input: a__name, b__id  (b__id is the neighbor row id)
+        let in_schema = Arc::new(Schema::new(vec![
+            Field::new("a__name", DataType::Utf8, false),
+            Field::new("b__id", DataType::UInt64, true),
+        ]));
+        let input = RecordBatch::try_new(
+            in_schema,
+            vec![
+                Arc::new(StringArray::from(vec!["x", "y"])),
+                Arc::new(UInt64Array::from(vec![1u64, 2])),
+            ],
+        )
+        .unwrap();
+        // out: a__name, b__id, b__name, b__age
+        let out_schema = Arc::new(Schema::new(vec![
+            Field::new("a__name", DataType::Utf8, false),
+            Field::new("b__id", DataType::UInt64, true),
+            Field::new("b__name", DataType::Utf8, true),
+            Field::new("b__age", DataType::Int64, true),
+        ]));
+        let m = InMemoryMaterializer::new(target_batch());
+        let out = take_batch(
+            &input,
+            1,
+            &m,
+            &["name".to_string(), "age".to_string()],
+            &out_schema,
+        )
+        .unwrap();
+        assert_eq!(out.num_rows(), 2);
+        let b_name = out.column(2).as_any().downcast_ref::<StringArray>().unwrap();
+        let b_age = out
+            .column(3)
+            .as_any()
+            .downcast_ref::<arrow_array::Int64Array>()
+            .unwrap();
+        assert_eq!(b_name.value(0), "bob"); // row id 1
+        assert_eq!(b_name.value(1), "carol"); // row id 2
+        assert_eq!(b_age.values(), &[40, 50]);
+    }
+
+    #[test]
+    fn test_take_batch_empty_take_cols_passthrough() {
+        // With no target columns to materialize, output == input (same rows/cols).
+        let in_schema = Arc::new(Schema::new(vec![
+            Field::new("a__name", DataType::Utf8, false),
+            Field::new("b__id", DataType::UInt64, true),
+        ]));
+        let input = RecordBatch::try_new(
+            in_schema.clone(),
+            vec![
+                Arc::new(StringArray::from(vec!["x", "y"])),
+                Arc::new(UInt64Array::from(vec![1u64, 2])),
+            ],
+        )
+        .unwrap();
+        let m = InMemoryMaterializer::new(target_batch());
+        let out = take_batch(&input, 1, &m, &[], &in_schema).unwrap();
+        assert_eq!(out.num_rows(), 2);
+        assert_eq!(out.num_columns(), 2);
+    }
+}

From f2a1526fe2f06d0f54e5847c23b79eec991b4b7f Mon Sep 17 00:00:00 2001
From: "jianjian.xie" <jja725@gmail.com>
Date: Mon, 22 Jun 2026 16:00:47 -0700
Subject: [PATCH 07/10] feat(native): CsrExtensionPlanner + CsrQueryPlanner
 physical planning

---
 .../lance_native_planner/extension_planner.rs | 153 ++++++++++++++++++
 .../src/lance_native_planner/mod.rs           |   2 +
 2 files changed, 155 insertions(+)
 create mode 100644 crates/lance-graph/src/lance_native_planner/extension_planner.rs

diff --git a/crates/lance-graph/src/lance_native_planner/extension_planner.rs b/crates/lance-graph/src/lance_native_planner/extension_planner.rs
new file mode 100644
index 00000000..ce569402
--- /dev/null
+++ b/crates/lance-graph/src/lance_native_planner/extension_planner.rs
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Physical planning for the native CSR extension nodes.
+//!
+//! `CsrQueryPlanner` is registered on the execution `SessionContext`. It runs
+//! the `DefaultPhysicalPlanner` with `CsrExtensionPlanner`, which builds the
+//! `CsrIndex` (from the edge table) and the `InMemoryMaterializer` (from the
+//! target node table) at physical-planning time.
+
+use std::sync::Arc;
+
+use arrow::compute::concat_batches;
+use arrow_schema::Field;
+use async_trait::async_trait;
+use datafusion::common::Result as DFResult;
+use datafusion::error::DataFusionError;
+use datafusion::execution::context::{QueryPlanner, SessionContext, SessionState};
+use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNode};
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner};
+
+use super::csr_expand::{CsrExpandExec, CsrExpandNode};
+use super::direction::NativeDirection;
+use super::take::{InMemoryMaterializer, LanceTakeExec, LanceTakeNode};
+use crate::csr_index::CsrIndexBuilder;
+
+/// Collect a registered table to a single `RecordBatch`.
+///
+/// Phase 2 builds the CSR / materializer eagerly at physical-planning time, so
+/// the whole edge / target table is read into memory here. An empty table yields
+/// an empty batch (correct schema), producing an empty CSR — i.e. no traversals.
+async fn collect_table(
+    session_state: &SessionState,
+    table: &str,
+) -> DFResult<arrow_array::RecordBatch> {
+    let ctx = SessionContext::new_with_state(session_state.clone());
+    let df = ctx.table(table).await?;
+    let schema = df.schema().inner().clone();
+    let batches = df.collect().await?;
+    concat_batches(&schema, &batches).map_err(|e| DataFusionError::Execution(e.to_string()))
+}
+
+/// Extension planner that lowers `CsrExpandNode` and `LanceTakeNode`.
+#[derive(Debug)]
+pub struct CsrExtensionPlanner;
+
+#[async_trait]
+impl ExtensionPlanner for CsrExtensionPlanner {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        _logical_inputs: &[&LogicalPlan],
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+        session_state: &SessionState,
+    ) -> DFResult<Option<Arc<dyn ExecutionPlan>>> {
+        if let Some(expand) = node.as_any().downcast_ref::<CsrExpandNode>() {
+            let input = physical_inputs
+                .first()
+                .ok_or_else(|| DataFusionError::Internal(
+                    "CsrExpandNode: expected 1 physical input, got 0".to_string(),
+                ))?
+                .clone();
+
+            // Build CSR from the edge table (reverse columns for Incoming).
+            let edges = collect_table(session_state, &expand.rel_type).await?;
+            let (src_col, dst_col) = match expand.direction {
+                NativeDirection::Outgoing => (&expand.src_field, &expand.dst_field),
+                NativeDirection::Incoming => (&expand.dst_field, &expand.src_field),
+            };
+            let csr = CsrIndexBuilder::new()
+                .add_edges_from_batch_with_columns(&edges, src_col, dst_col)
+                .map_err(|e| DataFusionError::Execution(e.to_string()))?
+                .build();
+
+            let in_schema = input.schema();
+            let source_id_idx = in_schema.index_of(&expand.source_id_column).map_err(|e| {
+                DataFusionError::Execution(format!(
+                    "CsrExpand: source id column '{}' not found in input: {}",
+                    expand.source_id_column, e
+                ))
+            })?;
+            let neighbor_field =
+                Field::new(&expand.neighbor_column, expand.neighbor_data_type.clone(), true);
+            let out_schema = expand.schema.inner().clone();
+
+            return Ok(Some(Arc::new(CsrExpandExec::new(
+                input,
+                Arc::new(csr),
+                source_id_idx,
+                neighbor_field,
+                out_schema,
+            ))));
+        }
+
+        if let Some(take) = node.as_any().downcast_ref::<LanceTakeNode>() {
+            let input = physical_inputs
+                .first()
+                .ok_or_else(|| DataFusionError::Internal(
+                    "LanceTakeNode: expected 1 physical input, got 0".to_string(),
+                ))?
+                .clone();
+
+            let target = collect_table(session_state, &take.target_table).await?;
+            let materializer = Arc::new(InMemoryMaterializer::new(target));
+
+            let in_schema = input.schema();
+            let row_id_idx = in_schema.index_of(&take.row_id_column).map_err(|e| {
+                DataFusionError::Execution(format!(
+                    "LanceTake: row id column '{}' not found in input: {}",
+                    take.row_id_column, e
+                ))
+            })?;
+            let out_schema = take.schema.inner().clone();
+
+            return Ok(Some(Arc::new(LanceTakeExec::new(
+                input,
+                materializer,
+                row_id_idx,
+                take.take_cols.clone(),
+                out_schema,
+            ))));
+        }
+
+        Ok(None)
+    }
+}
+
+/// Query planner that installs `CsrExtensionPlanner`.
+#[derive(Debug, Default)]
+pub struct CsrQueryPlanner;
+
+impl CsrQueryPlanner {
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+#[async_trait]
+impl QueryPlanner for CsrQueryPlanner {
+    async fn create_physical_plan(
+        &self,
+        logical_plan: &LogicalPlan,
+        session_state: &SessionState,
+    ) -> DFResult<Arc<dyn ExecutionPlan>> {
+        let planner =
+            DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(CsrExtensionPlanner)]);
+        planner
+            .create_physical_plan(logical_plan, session_state)
+            .await
+    }
+}
diff --git a/crates/lance-graph/src/lance_native_planner/mod.rs b/crates/lance-graph/src/lance_native_planner/mod.rs
index 06f1ae28..f8493480 100644
--- a/crates/lance-graph/src/lance_native_planner/mod.rs
+++ b/crates/lance-graph/src/lance_native_planner/mod.rs
@@ -10,8 +10,10 @@
 mod direction;
 mod csr_expand;
 mod take;
+mod extension_planner;
 
 pub use direction::NativeDirection;
+pub use extension_planner::CsrQueryPlanner;
 
 use crate::config::GraphConfig;
 use crate::datafusion_planner::GraphPhysicalPlanner;

From f622fb4c2577f2d304f04f039059dccd96c74718 Mon Sep 17 00:00:00 2001
From: "jianjian.xie" <jja725@gmail.com>
Date: Mon, 22 Jun 2026 16:14:23 -0700
Subject: [PATCH 08/10] feat(native): LanceNativePlanner single-hop lowering
 with DataFusion fallback

---
 .../lance-graph/src/datafusion_planner/mod.rs |   9 +-
 .../src/lance_native_planner/mod.rs           | 501 +++++++++++++++++-
 2 files changed, 488 insertions(+), 22 deletions(-)

diff --git a/crates/lance-graph/src/datafusion_planner/mod.rs b/crates/lance-graph/src/datafusion_planner/mod.rs
index dfb4bb5b..8ac64671 100644
--- a/crates/lance-graph/src/datafusion_planner/mod.rs
+++ b/crates/lance-graph/src/datafusion_planner/mod.rs
@@ -17,14 +17,14 @@
 pub mod analysis;
 mod builder;
 mod config_helpers;
-mod expression;
+pub(crate) mod expression;
 mod join_ops;
 mod scan_ops;
 mod udf;
 pub mod vector_ops;
 
 #[cfg(test)]
-mod test_fixtures;
+pub(crate) mod test_fixtures;
 
 // Re-export public types
 pub use analysis::{PlanningContext, QueryAnalysis, RelationshipInstance};
@@ -62,6 +62,11 @@ impl DataFusionPlanner {
         }
     }
 
+    /// Access the catalog, if any (used by the native planner).
+    pub(crate) fn catalog_ref(&self) -> Option<&Arc<dyn GraphSourceCatalog>> {
+        self.catalog.as_ref()
+    }
+
     /// Helper to convert DataFusion builder errors into GraphError::PlanError with context
     pub(crate) fn plan_error<E: std::fmt::Display>(
         &self,
diff --git a/crates/lance-graph/src/lance_native_planner/mod.rs b/crates/lance-graph/src/lance_native_planner/mod.rs
index f8493480..07fbc7f6 100644
--- a/crates/lance-graph/src/lance_native_planner/mod.rs
+++ b/crates/lance-graph/src/lance_native_planner/mod.rs
@@ -1,46 +1,507 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 
-//! Lance Native physical planner (placeholder)
+//! Lance Native physical planner.
 //!
-//! Rewritten in a later task to lower single-hop `Expand` onto CSR-backed
-//! extension nodes. For now it keeps the original placeholder behavior so the
-//! crate compiles between tasks.
+//! Lowers a supported single-hop `Expand` onto CSR-backed extension nodes
+//! (`CsrExpandNode` + `LanceTakeNode`). Any plan it cannot serve natively is
+//! delegated wholesale to `DataFusionPlanner`, so `LanceNative` execution is
+//! always correct on valid Cypher — it simply uses joins when CSR cannot help.
 
-mod direction;
 mod csr_expand;
-mod take;
+mod direction;
 mod extension_planner;
+mod take;
 
 pub use direction::NativeDirection;
 pub use extension_planner::CsrQueryPlanner;
 
-use crate::config::GraphConfig;
-use crate::datafusion_planner::GraphPhysicalPlanner;
-use crate::error::Result;
-use crate::logical_plan::LogicalOperator;
-use datafusion::common::DFSchema;
-use datafusion::logical_expr::{EmptyRelation, LogicalPlan};
 use std::sync::Arc;
 
-/// Placeholder Lance-native planner
+use arrow_schema::{Field, Schema};
+use datafusion::common::DFSchema;
+use datafusion::logical_expr::{Extension, LogicalPlan, LogicalPlanBuilder};
+
+use crate::ast::RelationshipDirection;
+use crate::case_insensitive::qualify_column;
+use crate::config::GraphConfig;
+use crate::datafusion_planner::expression::{to_df_boolean_expr, to_df_value_expr};
+use crate::datafusion_planner::{analysis, DataFusionPlanner, GraphPhysicalPlanner, PlanningContext};
+use crate::error::{GraphError, Result};
+use crate::logical_plan::{LogicalOperator, ProjectionItem, SortItem};
+
+use csr_expand::CsrExpandNode;
+use take::LanceTakeNode;
+
+/// Lance-native planner: CSR single-hop expand with DataFusion fallback.
 pub struct LanceNativePlanner {
-    #[allow(dead_code)]
     config: GraphConfig,
+    df: DataFusionPlanner,
 }
 
 impl LanceNativePlanner {
     pub fn new(config: GraphConfig) -> Self {
-        Self { config }
+        Self {
+            df: DataFusionPlanner::new(config.clone()),
+            config,
+        }
+    }
+
+    pub fn with_catalog(
+        config: GraphConfig,
+        catalog: Arc<dyn lance_graph_catalog::GraphSourceCatalog>,
+    ) -> Self {
+        Self {
+            df: DataFusionPlanner::with_catalog(config.clone(), catalog),
+            config,
+        }
     }
 }
 
 impl GraphPhysicalPlanner for LanceNativePlanner {
-    fn plan(&self, _logical_plan: &LogicalOperator) -> Result<LogicalPlan> {
-        let schema = Arc::new(DFSchema::empty());
-        Ok(LogicalPlan::EmptyRelation(EmptyRelation {
-            produce_one_row: false,
-            schema,
+    fn plan(&self, logical_plan: &LogicalOperator) -> Result<LogicalPlan> {
+        if !can_plan_natively(logical_plan) {
+            return self.df.plan(logical_plan);
+        }
+        let analysis = analysis::analyze(logical_plan)?;
+        let mut ctx = PlanningContext::new(&analysis);
+        self.build_native(&mut ctx, logical_plan)
+    }
+}
+
+impl LanceNativePlanner {
+    fn build_native(
+        &self,
+        ctx: &mut PlanningContext,
+        op: &LogicalOperator,
+    ) -> Result<LogicalPlan> {
+        match op {
+            LogicalOperator::ScanByLabel {
+                variable,
+                label,
+                properties,
+            } => self.df.build_scan(ctx, variable, label, properties),
+
+            LogicalOperator::Filter { input, predicate } => {
+                let child = self.build_native(ctx, input)?;
+                let expr = to_df_boolean_expr(predicate);
+                LogicalPlanBuilder::from(child)
+                    .filter(expr)
+                    .map_err(|e| self.plan_err("filter", e))?
+                    .build()
+                    .map_err(|e| self.plan_err("filter build", e))
+            }
+
+            LogicalOperator::Project { input, projections } => {
+                let child = self.build_native(ctx, input)?;
+                self.build_project_on(child, projections)
+            }
+
+            LogicalOperator::Sort { input, sort_items } => {
+                let child = self.build_native(ctx, input)?;
+                self.build_sort_on(child, sort_items)
+            }
+
+            LogicalOperator::Limit { input, count } => {
+                let child = self.build_native(ctx, input)?;
+                LogicalPlanBuilder::from(child)
+                    .limit(0, Some(*count as usize))
+                    .map_err(|e| self.plan_err("limit", e))?
+                    .build()
+                    .map_err(|e| self.plan_err("limit build", e))
+            }
+
+            LogicalOperator::Offset { input, offset } => {
+                let child = self.build_native(ctx, input)?;
+                LogicalPlanBuilder::from(child)
+                    .limit(*offset as usize, None)
+                    .map_err(|e| self.plan_err("offset", e))?
+                    .build()
+                    .map_err(|e| self.plan_err("offset build", e))
+            }
+
+            LogicalOperator::Distinct { input } => {
+                let child = self.build_native(ctx, input)?;
+                LogicalPlanBuilder::from(child)
+                    .distinct()
+                    .map_err(|e| self.plan_err("distinct", e))?
+                    .build()
+                    .map_err(|e| self.plan_err("distinct build", e))
+            }
+
+            LogicalOperator::Expand {
+                input,
+                source_variable,
+                target_variable,
+                target_label,
+                relationship_types,
+                direction,
+                ..
+            } => self.build_expand_native(
+                ctx,
+                input,
+                source_variable,
+                target_variable,
+                target_label,
+                relationship_types,
+                direction,
+            ),
+
+            other => Err(GraphError::PlanError {
+                message: format!("native planner reached unsupported operator: {:?}", other),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            }),
+        }
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn build_expand_native(
+        &self,
+        ctx: &mut PlanningContext,
+        input: &LogicalOperator,
+        source_variable: &str,
+        target_variable: &str,
+        target_label: &str,
+        relationship_types: &[String],
+        direction: &RelationshipDirection,
+    ) -> Result<LogicalPlan> {
+        let source_plan = self.build_native(ctx, input)?;
+
+        let rel_type = &relationship_types[0];
+        let rel_map = self
+            .config
+            .get_relationship_mapping(rel_type)
+            .ok_or_else(|| GraphError::ConfigError {
+                message: format!("No relationship mapping for '{}'", rel_type),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?;
+
+        let src_label = ctx
+            .analysis
+            .var_to_label
+            .get(source_variable)
+            .ok_or_else(|| GraphError::PlanError {
+                message: format!("No label for source variable '{}'", source_variable),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?;
+        let src_node = self
+            .config
+            .get_node_mapping(src_label)
+            .ok_or_else(|| GraphError::ConfigError {
+                message: format!("No node mapping for label '{}'", src_label),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?;
+        let tgt_node = self
+            .config
+            .get_node_mapping(target_label)
+            .ok_or_else(|| GraphError::ConfigError {
+                message: format!("No node mapping for label '{}'", target_label),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?;
+
+        let catalog = self.df.catalog_ref().ok_or_else(|| GraphError::ConfigError {
+            message: "LanceNativePlanner requires a catalog for native expand".to_string(),
+            location: snafu::Location::new(file!(), line!(), column!()),
+        })?;
+        let tgt_source = catalog.node_source(target_label).ok_or_else(|| {
+            GraphError::ConfigError {
+                message: format!("No table source for target label '{}'", target_label),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            }
+        })?;
+        let tgt_arrow = tgt_source.schema();
+
+        let native_direction = match direction {
+            RelationshipDirection::Outgoing => NativeDirection::Outgoing,
+            RelationshipDirection::Incoming => NativeDirection::Incoming,
+            RelationshipDirection::Undirected => {
+                return Err(GraphError::PlanError {
+                    message: "undirected expand is not natively supported".to_string(),
+                    location: snafu::Location::new(file!(), line!(), column!()),
+                });
+            }
+        };
+
+        let source_id_column = qualify_column(source_variable, &src_node.id_field);
+        let neighbor_column = qualify_column(target_variable, &tgt_node.id_field);
+        let id_lower = tgt_node.id_field.to_lowercase();
+        let neighbor_data_type = tgt_arrow
+            .field_with_name(&id_lower)
+            .map_err(|e| GraphError::ConfigError {
+                message: format!(
+                    "target id field '{}' not found in '{}': {}",
+                    tgt_node.id_field, target_label, e
+                ),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?
+            .data_type()
+            .clone();
+
+        // CsrExpandNode output schema = source schema + neighbor column.
+        let src_arrow = source_plan.schema().inner();
+        let mut expand_fields: Vec<Field> =
+            src_arrow.fields().iter().map(|f| f.as_ref().clone()).collect();
+        expand_fields.push(Field::new(&neighbor_column, neighbor_data_type.clone(), true));
+        let expand_arrow = Schema::new(expand_fields);
+        // Fields are intentionally unqualified: names are already the pre-qualified
+        // `{var}__{col}` strings the reused DataFusion builders resolve by name.
+        let expand_schema = Arc::new(
+            DFSchema::try_from(expand_arrow).map_err(|e| self.plan_err("expand schema", e))?,
+        );
+
+        let expand_node = CsrExpandNode {
+            input: source_plan,
+            rel_type: rel_type.to_lowercase(),
+            src_field: rel_map.source_id_field.to_lowercase(),
+            dst_field: rel_map.target_id_field.to_lowercase(),
+            direction: native_direction,
+            source_id_column,
+            neighbor_column: neighbor_column.clone(),
+            neighbor_data_type,
+            schema: expand_schema,
+        };
+        let expand_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(expand_node),
+        });
+
+        // LanceTakeNode: materialize all target columns except the id field.
+        let take_cols: Vec<String> = tgt_arrow
+            .fields()
+            .iter()
+            .map(|f| f.name().to_lowercase())
+            .filter(|n| n != &id_lower)
+            .collect();
+
+        let mut take_fields: Vec<Field> = expand_plan
+            .schema()
+            .inner()
+            .fields()
+            .iter()
+            .map(|f| f.as_ref().clone())
+            .collect();
+        for raw in &take_cols {
+            let f = tgt_arrow
+                .field_with_name(raw)
+                .map_err(|e| self.plan_err("target field", e))?;
+            take_fields.push(Field::new(
+                qualify_column(target_variable, raw),
+                f.data_type().clone(),
+                true,
+            ));
+        }
+        let take_arrow = Schema::new(take_fields);
+        let take_schema =
+            Arc::new(DFSchema::try_from(take_arrow).map_err(|e| self.plan_err("take schema", e))?);
+
+        let take_node = LanceTakeNode {
+            input: expand_plan,
+            target_table: target_label.to_lowercase(),
+            row_id_column: neighbor_column,
+            take_cols,
+            schema: take_schema,
+        };
+        Ok(LogicalPlan::Extension(Extension {
+            node: Arc::new(take_node),
         }))
     }
+
+    fn build_project_on(
+        &self,
+        input: LogicalPlan,
+        projections: &[ProjectionItem],
+    ) -> Result<LogicalPlan> {
+        let has_agg = projections
+            .iter()
+            .any(|p| crate::datafusion_planner::expression::contains_aggregate(&p.expression));
+        if has_agg {
+            self.df.build_project_with_aggregates(input, projections)
+        } else {
+            self.df.build_simple_project(input, projections)
+        }
+    }
+
+    fn build_sort_on(&self, input: LogicalPlan, sort_items: &[SortItem]) -> Result<LogicalPlan> {
+        use datafusion::logical_expr::SortExpr;
+        let sort_exprs: Vec<SortExpr> = sort_items
+            .iter()
+            .map(|item| {
+                let expr = to_df_value_expr(&item.expression);
+                let asc = matches!(item.direction, crate::ast::SortDirection::Ascending);
+                SortExpr {
+                    expr,
+                    asc,
+                    nulls_first: true,
+                }
+            })
+            .collect();
+        LogicalPlanBuilder::from(input)
+            .sort(sort_exprs)
+            .map_err(|e| self.plan_err("sort", e))?
+            .build()
+            .map_err(|e| self.plan_err("sort build", e))
+    }
+
+    fn plan_err<E: std::fmt::Display>(&self, what: &str, e: E) -> GraphError {
+        GraphError::PlanError {
+            message: format!("native {}: {}", what, e),
+            location: snafu::Location::new(file!(), line!(), column!()),
+        }
+    }
+}
+
+/// True iff the plan is a single-hop expand the native planner can serve.
+fn can_plan_natively(op: &LogicalOperator) -> bool {
+    let mut expands = 0usize;
+    if !walk_supported(op, &mut expands) {
+        return false;
+    }
+    expands == 1
+}
+
+/// Recursively check every operator is natively supportable, accumulating the
+/// total number of `Expand` nodes into `expands`. Returns false on any
+/// unsupported operator. The caller enforces exactly one expand (`expands == 1`);
+/// this function does not early-exit on the second expand.
+fn walk_supported(op: &LogicalOperator, expands: &mut usize) -> bool {
+    match op {
+        LogicalOperator::ScanByLabel { .. } => true,
+        LogicalOperator::Filter { input, .. }
+        | LogicalOperator::Project { input, .. }
+        | LogicalOperator::Sort { input, .. }
+        | LogicalOperator::Limit { input, .. }
+        | LogicalOperator::Offset { input, .. }
+        | LogicalOperator::Distinct { input } => walk_supported(input, expands),
+        LogicalOperator::Expand {
+            input,
+            relationship_types,
+            direction,
+            properties,
+            target_properties,
+            ..
+        } => {
+            *expands += 1;
+            if relationship_types.len() != 1 {
+                return false;
+            }
+            if matches!(direction, RelationshipDirection::Undirected) {
+                return false;
+            }
+            // Inline relationship/target-node property filters are not handled by
+            // the native path yet; fall back to the DataFusion join planner.
+            if !properties.is_empty() || !target_properties.is_empty() {
+                return false;
+            }
+            walk_supported(input, expands)
+        }
+        LogicalOperator::VariableLengthExpand { .. }
+        | LogicalOperator::Join { .. }
+        | LogicalOperator::Unwind { .. } => false,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ast::{PropertyRef, ValueExpression};
+    use crate::datafusion_planner::test_fixtures::{make_catalog, person_knows_config, person_scan};
+    use crate::logical_plan::{LogicalOperator, ProjectionItem};
+
+    fn knows_expand(direction: RelationshipDirection) -> LogicalOperator {
+        LogicalOperator::Expand {
+            input: Box::new(person_scan("a")),
+            source_variable: "a".to_string(),
+            target_variable: "b".to_string(),
+            target_label: "Person".to_string(),
+            relationship_types: vec!["KNOWS".to_string()],
+            direction,
+            relationship_variable: None,
+            properties: Default::default(),
+            target_properties: Default::default(),
+        }
+    }
+
+    #[test]
+    fn test_can_plan_natively_single_hop() {
+        let plan = LogicalOperator::Project {
+            input: Box::new(knows_expand(RelationshipDirection::Outgoing)),
+            projections: vec![ProjectionItem {
+                expression: ValueExpression::Property(PropertyRef::new("b", "name")),
+                alias: None,
+            }],
+        };
+        assert!(can_plan_natively(&plan));
+    }
+
+    #[test]
+    fn test_cannot_plan_undirected_or_multitype() {
+        assert!(!can_plan_natively(&knows_expand(
+            RelationshipDirection::Undirected
+        )));
+        let mut multi = knows_expand(RelationshipDirection::Outgoing);
+        if let LogicalOperator::Expand {
+            relationship_types, ..
+        } = &mut multi
+        {
+            relationship_types.push("LIKES".to_string());
+        }
+        assert!(!can_plan_natively(&multi));
+    }
+
+    #[test]
+    fn test_cannot_plan_zero_expands() {
+        assert!(!can_plan_natively(&person_scan("a")));
+    }
+
+    #[test]
+    fn test_native_plan_contains_extension_nodes() {
+        let plan = LogicalOperator::Project {
+            input: Box::new(knows_expand(RelationshipDirection::Outgoing)),
+            projections: vec![ProjectionItem {
+                expression: ValueExpression::Property(PropertyRef::new("b", "name")),
+                alias: None,
+            }],
+        };
+        let planner = LanceNativePlanner::with_catalog(person_knows_config(), make_catalog());
+        let df_plan = planner.plan(&plan).unwrap();
+        let s = format!("{:?}", df_plan);
+        assert!(s.contains("CsrExpand"), "missing CsrExpand: {}", s);
+        assert!(s.contains("LanceTake"), "missing LanceTake: {}", s);
+    }
+
+    #[test]
+    fn test_unsupported_falls_back_to_join() {
+        let vlexpand = LogicalOperator::VariableLengthExpand {
+            input: Box::new(person_scan("a")),
+            source_variable: "a".into(),
+            target_variable: "b".into(),
+            relationship_types: vec!["KNOWS".into()],
+            direction: RelationshipDirection::Outgoing,
+            relationship_variable: None,
+            min_length: Some(1),
+            max_length: Some(2),
+            target_properties: Default::default(),
+        };
+        let planner = LanceNativePlanner::with_catalog(person_knows_config(), make_catalog());
+        let df_plan = planner.plan(&vlexpand).unwrap();
+        let s = format!("{:?}", df_plan);
+        assert!(!s.contains("CsrExpand"), "should not be native: {}", s);
+    }
+
+    #[test]
+    fn test_cannot_plan_expand_with_inline_property_filters() {
+        use crate::ast::PropertyValue;
+        // Target-node inline filter -> must fall back.
+        let mut op = knows_expand(RelationshipDirection::Outgoing);
+        if let LogicalOperator::Expand { target_properties, .. } = &mut op {
+            target_properties.insert("active".into(), PropertyValue::Boolean(true));
+        }
+        assert!(!can_plan_natively(&op));
+
+        // Relationship inline filter -> must fall back.
+        let mut op2 = knows_expand(RelationshipDirection::Outgoing);
+        if let LogicalOperator::Expand { properties, .. } = &mut op2 {
+            properties.insert("since".into(), PropertyValue::Integer(2020));
+        }
+        assert!(!can_plan_natively(&op2));
+    }
 }

From 2df8fccefad5e3249c95c9dc6f6845008efb228a Mon Sep 17 00:00:00 2001
From: "jianjian.xie" <jja725@gmail.com>
Date: Mon, 22 Jun 2026 16:31:07 -0700
Subject: [PATCH 09/10] feat(native): wire LanceNative execution strategy + e2e
 parity tests

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
---
 crates/lance-graph/src/query.rs               | 103 ++++++++++-
 .../tests/test_lance_native_expand.rs         | 160 ++++++++++++++++++
 2 files changed, 254 insertions(+), 9 deletions(-)
 create mode 100644 crates/lance-graph/tests/test_lance_native_expand.rs

diff --git a/crates/lance-graph/src/query.rs b/crates/lance-graph/src/query.rs
index e63e2a6a..eb5d8ff6 100644
--- a/crates/lance-graph/src/query.rs
+++ b/crates/lance-graph/src/query.rs
@@ -233,10 +233,7 @@ impl CypherQuery {
         let strategy = strategy.unwrap_or_default();
         match strategy {
             ExecutionStrategy::DataFusion => self.execute_datafusion(datasets).await,
-            ExecutionStrategy::LanceNative => Err(GraphError::UnsupportedFeature {
-                feature: "Lance native execution strategy is not yet implemented".to_string(),
-                location: snafu::Location::new(file!(), line!(), column!()),
-            }),
+            ExecutionStrategy::LanceNative => self.execute_lance_native(datasets).await,
         }
     }
 
@@ -328,7 +325,7 @@ impl CypherQuery {
 
         // Build catalog and context from datasets
         let (catalog, ctx) = self
-            .build_catalog_and_context_from_datasets(datasets)
+            .build_catalog_and_context_from_datasets(datasets, false)
             .await?;
 
         // Delegate to the internal explain method
@@ -366,7 +363,7 @@ impl CypherQuery {
 
         // Build catalog and context from datasets using the helper
         let (catalog, ctx) = self
-            .build_catalog_and_context_from_datasets(datasets)
+            .build_catalog_and_context_from_datasets(datasets, false)
             .await?;
 
         // Generate Logical Plan
@@ -596,7 +593,7 @@ impl CypherQuery {
 
         // Build catalog and context from datasets
         let (catalog, ctx) = self
-            .build_catalog_and_context_from_datasets(datasets)
+            .build_catalog_and_context_from_datasets(datasets, false)
             .await?;
 
         // Delegate to common execution logic
@@ -604,10 +601,87 @@ impl CypherQuery {
             .await
     }
 
+    /// Execute using the Lance native CSR strategy with in-memory datasets.
+    ///
+    /// Installs `CsrQueryPlanner` on the session so that CSR extension nodes in the
+    /// logical plan are lowered to `CsrExpandExec` / `LanceTakeExec` at physical-plan
+    /// time. Unsupported plans (e.g. variable-length paths) fall back automatically to
+    /// the DataFusion join path via `LanceNativePlanner`'s internal delegate.
+    async fn execute_lance_native(
+        &self,
+        datasets: HashMap<String, arrow::record_batch::RecordBatch>,
+    ) -> Result<arrow::record_batch::RecordBatch> {
+        use arrow::compute::concat_batches;
+        use std::sync::Arc;
+
+        // Build catalog and a CSR-enabled session context.
+        let (catalog, ctx) = self
+            .build_catalog_and_context_from_datasets(datasets, true)
+            .await?;
+
+        // Lower the graph logical plan through LanceNativePlanner.
+        let df_logical_plan = self.create_logical_plans_native(Arc::new(catalog))?;
+
+        let df = ctx
+            .execute_logical_plan(df_logical_plan)
+            .await
+            .map_err(|e| GraphError::ExecutionError {
+                message: format!("Failed to execute native plan: {}", e),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            })?;
+        let result_schema = df.schema().inner().clone();
+        let batches = df.collect().await.map_err(|e| GraphError::ExecutionError {
+            message: format!("Failed to collect native results: {}", e),
+            location: snafu::Location::new(file!(), line!(), column!()),
+        })?;
+        if batches.is_empty() {
+            return Ok(arrow::record_batch::RecordBatch::new_empty(result_schema));
+        }
+        concat_batches(&result_schema, &batches).map_err(|e| GraphError::ExecutionError {
+            message: format!("Failed to concat native results: {}", e),
+            location: snafu::Location::new(file!(), line!(), column!()),
+        })
+    }
+
+    /// Build a DataFusion `LogicalPlan` via `LanceNativePlanner`.
+    ///
+    /// Mirrors `create_logical_plans` exactly but uses `LanceNativePlanner` for phase 3
+    /// instead of `DataFusionPlanner`. Unsupported patterns (variable-length paths, etc.)
+    /// are transparently delegated back to the DataFusion join planner.
+    fn create_logical_plans_native(
+        &self,
+        catalog: std::sync::Arc<dyn lance_graph_catalog::GraphSourceCatalog>,
+    ) -> Result<datafusion::logical_expr::LogicalPlan> {
+        use crate::datafusion_planner::GraphPhysicalPlanner;
+        use crate::lance_native_planner::LanceNativePlanner;
+        use crate::semantic::SemanticAnalyzer;
+
+        let config = self.require_config()?;
+
+        // Phase 1: Semantic Analysis
+        let mut analyzer = SemanticAnalyzer::new(config.clone());
+        let semantic = analyzer.analyze(&self.ast, &self.parameters)?;
+        if !semantic.errors.is_empty() {
+            return Err(GraphError::PlanError {
+                message: format!("Semantic analysis failed:\n{}", semantic.errors.join("\n")),
+                location: snafu::Location::new(file!(), line!(), column!()),
+            });
+        }
+
+        // Phase 2: Graph Logical Plan
+        let mut logical_planner = LogicalPlanner::new(config);
+        let logical_plan = logical_planner.plan(&semantic.ast)?;
+
+        // Phase 3: Native DataFusion Logical Plan (with CSR extension nodes where possible)
+        let native = LanceNativePlanner::with_catalog(config.clone(), catalog);
+        native.plan(&logical_plan)
+    }
+
     /// Helper to build catalog and context from in-memory datasets
     async fn build_catalog_and_context_from_datasets(
         &self,
         datasets: HashMap<String, arrow::record_batch::RecordBatch>,
+        native: bool,
     ) -> Result<(
         lance_graph_catalog::InMemoryCatalog,
         datafusion::execution::context::SessionContext,
@@ -624,8 +698,19 @@ impl CypherQuery {
             });
         }
 
-        // Create session context and catalog
-        let ctx = SessionContext::new();
+        // Create session context — with CSR query planner when native=true
+        let ctx = if native {
+            use datafusion::execution::session_state::SessionStateBuilder;
+            let state = SessionStateBuilder::new()
+                .with_default_features()
+                .with_query_planner(Arc::new(
+                    crate::lance_native_planner::CsrQueryPlanner::new(),
+                ))
+                .build();
+            SessionContext::new_with_state(state)
+        } else {
+            SessionContext::new()
+        };
         let mut catalog = InMemoryCatalog::new();
 
         // Register all datasets as tables
diff --git a/crates/lance-graph/tests/test_lance_native_expand.rs b/crates/lance-graph/tests/test_lance_native_expand.rs
new file mode 100644
index 00000000..b8b88eda
--- /dev/null
+++ b/crates/lance-graph/tests/test_lance_native_expand.rs
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! End-to-end parity tests: native CSR expand vs DataFusion join path.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use arrow_array::{Int64Array, RecordBatch, StringArray, UInt64Array};
+use arrow_schema::{DataType, Field, Schema};
+use lance_graph::config::GraphConfig;
+use lance_graph::query::{CypherQuery, ExecutionStrategy};
+
+fn person_batch() -> RecordBatch {
+    // Dense ids 0..4 (row id == id_field value).
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::UInt64, false),
+        Field::new("name", DataType::Utf8, false),
+        Field::new("age", DataType::Int64, false),
+    ]));
+    RecordBatch::try_new(
+        schema,
+        vec![
+            Arc::new(UInt64Array::from(vec![0u64, 1, 2, 3])),
+            Arc::new(StringArray::from(vec!["alice", "bob", "carol", "dave"])),
+            Arc::new(Int64Array::from(vec![30i64, 40, 25, 50])),
+        ],
+    )
+    .unwrap()
+}
+
+fn knows_batch() -> RecordBatch {
+    // 0->1, 0->2, 1->3, 2->3
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("src_id", DataType::UInt64, false),
+        Field::new("dst_id", DataType::UInt64, false),
+    ]));
+    RecordBatch::try_new(
+        schema,
+        vec![
+            Arc::new(UInt64Array::from(vec![0u64, 0, 1, 2])),
+            Arc::new(UInt64Array::from(vec![1u64, 2, 3, 3])),
+        ],
+    )
+    .unwrap()
+}
+
+fn config() -> GraphConfig {
+    GraphConfig::builder()
+        .with_node_label("Person", "id")
+        .with_relationship("KNOWS", "src_id", "dst_id")
+        .build()
+        .unwrap()
+}
+
+fn datasets() -> HashMap<String, RecordBatch> {
+    let mut d = HashMap::new();
+    d.insert("Person".to_string(), person_batch());
+    d.insert("KNOWS".to_string(), knows_batch());
+    d
+}
+
+/// Collect (a.name, b.name) rows as a sorted Vec for order-independent compare.
+fn name_pairs(batch: &RecordBatch) -> Vec<(String, String)> {
+    let cols: Vec<&StringArray> = (0..batch.num_columns())
+        .map(|i| {
+            batch
+                .column(i)
+                .as_any()
+                .downcast_ref::<StringArray>()
+                .unwrap()
+        })
+        .collect();
+    let mut rows: Vec<(String, String)> = (0..batch.num_rows())
+        .map(|r| (cols[0].value(r).to_string(), cols[1].value(r).to_string()))
+        .collect();
+    rows.sort();
+    rows
+}
+
+#[tokio::test]
+async fn test_native_expand_matches_datafusion_names() {
+    let q = "MATCH (a:Person)-[:KNOWS]->(b:Person) RETURN a.name, b.name";
+    let query = CypherQuery::new(q).unwrap().with_config(config());
+
+    let native = query
+        .execute(datasets(), Some(ExecutionStrategy::LanceNative))
+        .await
+        .unwrap();
+    let df = query
+        .execute(datasets(), Some(ExecutionStrategy::DataFusion))
+        .await
+        .unwrap();
+
+    let expected = vec![
+        ("alice".to_string(), "bob".to_string()),
+        ("alice".to_string(), "carol".to_string()),
+        ("bob".to_string(), "dave".to_string()),
+        ("carol".to_string(), "dave".to_string()),
+    ];
+    assert_eq!(name_pairs(&native), expected);
+    assert_eq!(name_pairs(&native), name_pairs(&df));
+}
+
+#[tokio::test]
+async fn test_native_expand_with_target_filter() {
+    let q = "MATCH (a:Person)-[:KNOWS]->(b:Person) WHERE b.age > 30 RETURN a.name, b.name";
+    let query = CypherQuery::new(q).unwrap().with_config(config());
+
+    let native = query
+        .execute(datasets(), Some(ExecutionStrategy::LanceNative))
+        .await
+        .unwrap();
+    let df = query
+        .execute(datasets(), Some(ExecutionStrategy::DataFusion))
+        .await
+        .unwrap();
+    assert_eq!(name_pairs(&native), name_pairs(&df));
+    // bob(40) and dave(50) qualify as targets: (alice,bob),(bob,dave),(carol,dave)
+    assert_eq!(
+        name_pairs(&native),
+        vec![
+            ("alice".to_string(), "bob".to_string()),
+            ("bob".to_string(), "dave".to_string()),
+            ("carol".to_string(), "dave".to_string()),
+        ]
+    );
+}
+
+#[tokio::test]
+async fn test_native_expand_incoming_matches_datafusion() {
+    let q = "MATCH (a:Person)<-[:KNOWS]-(b:Person) RETURN a.name, b.name";
+    let query = CypherQuery::new(q).unwrap().with_config(config());
+    let native = query
+        .execute(datasets(), Some(ExecutionStrategy::LanceNative))
+        .await
+        .unwrap();
+    let df = query
+        .execute(datasets(), Some(ExecutionStrategy::DataFusion))
+        .await
+        .unwrap();
+    assert_eq!(name_pairs(&native), name_pairs(&df));
+}
+
+#[tokio::test]
+async fn test_native_varlength_falls_back_and_matches() {
+    // Variable-length path is unsupported natively; LanceNative must fall back
+    // and produce the same result as DataFusion.
+    let q = "MATCH (a:Person)-[:KNOWS*1..2]->(b:Person) RETURN a.name, b.name";
+    let query = CypherQuery::new(q).unwrap().with_config(config());
+    let native = query
+        .execute(datasets(), Some(ExecutionStrategy::LanceNative))
+        .await
+        .unwrap();
+    let df = query
+        .execute(datasets(), Some(ExecutionStrategy::DataFusion))
+        .await
+        .unwrap();
+    assert_eq!(name_pairs(&native), name_pairs(&df));
+}

From 033a1329fc301d887d0da513a8669cdff0c6c471 Mon Sep 17 00:00:00 2001
From: "jianjian.xie" <jja725@gmail.com>
Date: Mon, 22 Jun 2026 23:20:28 -0700
Subject: [PATCH 10/10] test(csr): add error-path tests and doc comment for
 add_edges_from_batch_with_columns

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 crates/lance-graph/src/csr_index.rs | 41 ++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/crates/lance-graph/src/csr_index.rs b/crates/lance-graph/src/csr_index.rs
index 1d3d7200..526a38e3 100644
--- a/crates/lance-graph/src/csr_index.rs
+++ b/crates/lance-graph/src/csr_index.rs
@@ -236,7 +236,8 @@ impl CsrIndexBuilder {
         self
     }
 
-    /// Add edges from an Arrow RecordBatch with `src_id` and `dst_id` columns.
+    /// Add edges from an Arrow RecordBatch. Expects columns named `src_id` and
+    /// `dst_id`; use [`add_edges_from_batch_with_columns`] for other names.
     pub fn add_edges_from_batch(self, batch: &RecordBatch) -> Result<Self> {
         self.add_edges_from_batch_with_columns(batch, "src_id", "dst_id")
     }
@@ -693,5 +694,43 @@ mod tests {
             .build();
         assert_eq!(rev.neighbors(2), &[0, 1]);
         assert_eq!(rev.neighbors(1), &[0]);
+        assert_eq!(rev.neighbors(0), &[] as &[u64]);
+    }
+
+    #[test]
+    fn test_add_edges_from_batch_with_columns_errors() {
+        // Missing column name -> error
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("src_person_id", DataType::UInt64, false),
+            Field::new("dst_person_id", DataType::UInt64, false),
+        ]));
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(UInt64Array::from(vec![0u64])),
+                Arc::new(UInt64Array::from(vec![1u64])),
+            ],
+        )
+        .unwrap();
+        assert!(CsrIndexBuilder::new()
+            .add_edges_from_batch_with_columns(&batch, "missing", "dst_person_id")
+            .is_err());
+
+        // Wrong column type (Int64 instead of UInt64) -> error
+        let schema2 = Arc::new(Schema::new(vec![
+            Field::new("src_person_id", DataType::Int64, false),
+            Field::new("dst_person_id", DataType::UInt64, false),
+        ]));
+        let batch2 = RecordBatch::try_new(
+            schema2,
+            vec![
+                Arc::new(arrow_array::Int64Array::from(vec![0i64])),
+                Arc::new(UInt64Array::from(vec![1u64])),
+            ],
+        )
+        .unwrap();
+        assert!(CsrIndexBuilder::new()
+            .add_edges_from_batch_with_columns(&batch2, "src_person_id", "dst_person_id")
+            .is_err());
     }
 }