Skip to content

Commit bfeb15f

Browse files
committed
Merge branch 'main' of github.com:apache/datafusion-python into nick/docstrings-scalar-string
2 parents d1a31a7 + 93f4c34 commit bfeb15f

File tree

118 files changed

+1362
-3521
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+1362
-3521
lines changed

.cargo/config.toml

Lines changed: 0 additions & 5 deletions
This file was deleted.

.github/workflows/test.yml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,18 +41,6 @@ jobs:
4141
steps:
4242
- uses: actions/checkout@v6
4343

44-
- name: Verify example datafusion version
45-
run: |
46-
MAIN_VERSION=$(grep -A 1 "name = \"datafusion-common\"" Cargo.lock | grep "version = " | head -1 | sed 's/.*version = "\(.*\)"/\1/')
47-
EXAMPLE_VERSION=$(grep -A 1 "name = \"datafusion-common\"" examples/datafusion-ffi-example/Cargo.lock | grep "version = " | head -1 | sed 's/.*version = "\(.*\)"/\1/')
48-
echo "Main crate datafusion version: $MAIN_VERSION"
49-
echo "FFI example datafusion version: $EXAMPLE_VERSION"
50-
51-
if [ "$MAIN_VERSION" != "$EXAMPLE_VERSION" ]; then
52-
echo "❌ Error: FFI example datafusion versions don't match!"
53-
exit 1
54-
fi
55-
5644
- name: Setup Python
5745
uses: actions/setup-python@v6
5846
with:

Cargo.lock

Lines changed: 33 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 29 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
[package]
19-
name = "datafusion-python"
18+
[workspace.package]
2019
version = "52.0.0"
2120
homepage = "https://datafusion.apache.org/python"
2221
repository = "https://github.com/apache/datafusion-python"
@@ -26,67 +25,43 @@ readme = "README.md"
2625
license = "Apache-2.0"
2726
edition = "2024"
2827
rust-version = "1.88"
29-
include = [
30-
"/src",
31-
"/datafusion",
32-
"/LICENSE.txt",
33-
"build.rs",
34-
"pyproject.toml",
35-
"Cargo.toml",
36-
"Cargo.lock",
37-
]
3828

39-
[features]
40-
default = ["mimalloc"]
41-
protoc = ["datafusion-substrait/protoc"]
42-
substrait = ["dep:datafusion-substrait"]
29+
[workspace]
30+
members = ["crates/core", "crates/util", "examples/datafusion-ffi-example"]
31+
resolver = "3"
4332

44-
[dependencies]
45-
tokio = { version = "1.49", features = [
46-
"macros",
47-
"rt",
48-
"rt-multi-thread",
49-
"sync",
50-
] }
51-
pyo3 = { version = "0.28", features = [
52-
"extension-module",
53-
"abi3",
54-
"abi3-py310",
55-
] }
56-
pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] }
33+
[workspace.dependencies]
34+
tokio = { version = "1.49" }
35+
pyo3 = { version = "0.28" }
36+
pyo3-async-runtimes = { version = "0.28" }
5737
pyo3-log = "0.13.3"
58-
arrow = { version = "58", features = ["pyarrow"] }
38+
arrow = { version = "58" }
39+
arrow-array = { version = "58" }
40+
arrow-schema = { version = "58" }
5941
arrow-select = { version = "58" }
60-
datafusion = { version = "53", features = ["avro", "unicode_expressions"] }
61-
datafusion-substrait = { version = "53", optional = true }
42+
datafusion = { version = "53" }
43+
datafusion-substrait = { version = "53" }
6244
datafusion-proto = { version = "53" }
6345
datafusion-ffi = { version = "53" }
64-
prost = "0.14.3" # keep in line with `datafusion-substrait`
46+
datafusion-catalog = { version = "53", default-features = false }
47+
datafusion-common = { version = "53", default-features = false }
48+
datafusion-functions-aggregate = { version = "53" }
49+
datafusion-functions-window = { version = "53" }
50+
datafusion-expr = { version = "53" }
51+
prost = "0.14.3"
6552
serde_json = "1"
66-
uuid = { version = "1.21", features = ["v4"] }
67-
mimalloc = { version = "0.1", optional = true, default-features = false, features = [
68-
"local_dynamic_tls",
69-
] }
53+
uuid = { version = "1.21" }
54+
mimalloc = { version = "0.1", default-features = false }
7055
async-trait = "0.1.89"
7156
futures = "0.3"
7257
cstr = "0.2"
73-
object_store = { version = "0.13.1", features = [
74-
"aws",
75-
"gcp",
76-
"azure",
77-
"http",
78-
] }
58+
object_store = { version = "0.13.1" }
7959
url = "2"
8060
log = "0.4.29"
8161
parking_lot = "0.12"
82-
83-
[build-dependencies]
84-
prost-types = "0.14.3" # keep in line with `datafusion-substrait`
62+
prost-types = "0.14.3" # keep in line with `datafusion-substrait`
8563
pyo3-build-config = "0.28"
86-
87-
[lib]
88-
name = "datafusion_python"
89-
crate-type = ["cdylib", "rlib"]
64+
datafusion-python-util = { path = "crates/util" }
9065

9166
[profile.release]
9267
lto = true
@@ -99,3 +74,8 @@ datafusion = { git = "https://github.com/apache/datafusion.git", rev = "35749607
9974
datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
10075
datafusion-proto = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
10176
datafusion-ffi = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
77+
datafusion-catalog = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
78+
datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
79+
datafusion-functions-aggregate = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
80+
datafusion-functions-window = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
81+
datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,15 @@ uv run --no-project maturin develop --uv
278278
uv run --no-project pytest .
279279
```
280280

281+
To run the FFI tests within the examples folder, after you have built
282+
`datafusion-python` with the previous commands:
283+
284+
```bash
285+
cd examples/datafusion-ffi-example
286+
uv run --no-project maturin develop --uv
287+
uv run --no-project pytest python/tests/_test_*py
288+
```
289+
281290
### Running & Installing pre-commit hooks
282291

283292
`datafusion-python` takes advantage of [pre-commit](https://pre-commit.com/) to assist developers with code linting to help reduce

crates/core/Cargo.toml

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "datafusion-python"
20+
version.workspace = true
21+
edition.workspace = true
22+
rust-version.workspace = true
23+
license.workspace = true
24+
description.workspace = true
25+
homepage.workspace = true
26+
repository.workspace = true
27+
include = [
28+
"src",
29+
"../LICENSE.txt",
30+
"build.rs",
31+
"../pyproject.toml",
32+
"Cargo.toml",
33+
"../Cargo.lock",
34+
]
35+
36+
[dependencies]
37+
tokio = { workspace = true, features = [
38+
"macros",
39+
"rt",
40+
"rt-multi-thread",
41+
"sync",
42+
] }
43+
pyo3 = { workspace = true, features = [
44+
"extension-module",
45+
"abi3",
46+
"abi3-py310",
47+
] }
48+
pyo3-async-runtimes = { workspace = true, features = ["tokio-runtime"] }
49+
pyo3-log = { workspace = true }
50+
arrow = { workspace = true, features = ["pyarrow"] }
51+
arrow-select = { workspace = true }
52+
datafusion = { workspace = true, features = ["avro", "unicode_expressions"] }
53+
datafusion-substrait = { workspace = true, optional = true }
54+
datafusion-proto = { workspace = true }
55+
datafusion-ffi = { workspace = true }
56+
prost = { workspace = true } # keep in line with `datafusion-substrait`
57+
serde_json = { workspace = true }
58+
uuid = { workspace = true, features = ["v4"] }
59+
mimalloc = { workspace = true, optional = true, features = [
60+
"local_dynamic_tls",
61+
] }
62+
async-trait = { workspace = true }
63+
futures = { workspace = true }
64+
cstr = { workspace = true }
65+
object_store = { workspace = true, features = ["aws", "gcp", "azure", "http"] }
66+
url = { workspace = true }
67+
log = { workspace = true }
68+
parking_lot = { workspace = true }
69+
datafusion-python-util = { workspace = true }
70+
71+
[build-dependencies]
72+
prost-types = { workspace = true }
73+
pyo3-build-config = { workspace = true }
74+
75+
[features]
76+
default = ["mimalloc"]
77+
protoc = ["datafusion-substrait/protoc"]
78+
substrait = ["dep:datafusion-substrait"]
79+
80+
[lib]
81+
name = "datafusion_python"
82+
crate-type = ["cdylib", "rlib"]
File renamed without changes.

src/array.rs renamed to crates/core/src/array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ use arrow::array::{Array, ArrayRef};
2222
use arrow::datatypes::{Field, FieldRef};
2323
use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
2424
use arrow::pyarrow::ToPyArrow;
25+
use datafusion_python_util::validate_pycapsule;
2526
use pyo3::ffi::c_str;
2627
use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods};
2728
use pyo3::types::PyCapsule;
2829
use pyo3::{Bound, PyAny, PyResult, Python, pyclass, pymethods};
2930

3031
use crate::errors::PyDataFusionResult;
31-
use crate::utils::validate_pycapsule;
3232

3333
/// A Python object which implements the Arrow PyCapsule for importing
3434
/// into other libraries.

src/catalog.rs renamed to crates/core/src/catalog.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,20 @@ use datafusion::datasource::TableProvider;
3030
use datafusion_ffi::catalog_provider::FFI_CatalogProvider;
3131
use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
3232
use datafusion_ffi::schema_provider::FFI_SchemaProvider;
33+
use datafusion_python_util::{
34+
create_logical_extension_capsule, ffi_logical_codec_from_pycapsule, validate_pycapsule,
35+
wait_for_future,
36+
};
3337
use pyo3::IntoPyObjectExt;
3438
use pyo3::exceptions::PyKeyError;
3539
use pyo3::ffi::c_str;
3640
use pyo3::prelude::*;
3741
use pyo3::types::PyCapsule;
3842

43+
use crate::context::PySessionContext;
3944
use crate::dataset::Dataset;
4045
use crate::errors::{PyDataFusionError, PyDataFusionResult, py_datafusion_err, to_datafusion_err};
4146
use crate::table::PyTable;
42-
use crate::utils::{
43-
create_logical_extension_capsule, extract_logical_extension_codec, validate_pycapsule,
44-
wait_for_future,
45-
};
4647

4748
#[pyclass(
4849
from_py_object,
@@ -710,6 +711,17 @@ fn extract_schema_provider_from_pyobj(
710711
Ok(provider)
711712
}
712713

714+
fn extract_logical_extension_codec(
715+
py: Python,
716+
obj: Option<Bound<PyAny>>,
717+
) -> PyResult<Arc<FFI_LogicalExtensionCodec>> {
718+
let obj = match obj {
719+
Some(obj) => obj,
720+
None => PySessionContext::global_ctx()?.into_bound_py_any(py)?,
721+
};
722+
ffi_logical_codec_from_pycapsule(obj).map(Arc::new)
723+
}
724+
713725
pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
714726
m.add_class::<PyCatalog>()?;
715727
m.add_class::<PySchema>()?;
File renamed without changes.

0 commit comments

Comments
 (0)