Skip to content

Commit 5c8994e

Browse files
authored
Merge branch 'main' into datafusion_53
2 parents 30e393d + 231ed2b commit 5c8994e

File tree

6 files changed

+86
-8
lines changed

6 files changed

+86
-8
lines changed

.github/workflows/build.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,15 @@ jobs:
9898
- name: Check Cargo.toml formatting
9999
run: taplo format --check
100100

101+
check-crates-patch:
102+
if: inputs.build_mode == 'release'
103+
runs-on: ubuntu-latest
104+
steps:
105+
- uses: actions/checkout@v6
106+
107+
- name: Ensure [patch.crates-io] is empty
108+
run: python3 dev/check_crates_patch.py
109+
101110
generate-license:
102111
runs-on: ubuntu-latest
103112
steps:

.github/workflows/test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
uses: actions/cache@v5
6363
with:
6464
path: ~/.cargo
65-
key: cargo-cache-${{ steps.rust-toolchain.outputs.cachekey }}-${{ hashFiles('Cargo.lock') }}
65+
key: cargo-cache-${{ matrix.toolchain }}-${{ hashFiles('Cargo.lock') }}
6666

6767
- name: Install dependencies
6868
uses: astral-sh/setup-uv@v7
@@ -106,7 +106,7 @@ jobs:
106106
RUST_BACKTRACE: 1
107107
run: |
108108
git submodule update --init
109-
uv run --no-project pytest -v . --import-mode=importlib
109+
uv run --no-project pytest -v --import-mode=importlib
110110
111111
- name: FFI unit tests
112112
run: |

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ crate-type = ["cdylib", "rlib"]
9292
lto = true
9393
codegen-units = 1
9494

95-
# TODO: remove when datafusion-53 is released
95+
# We cannot publish to crates.io with any patches in the below section. Developers
96+
# must remove any entries in this section before creating a release candidate.
9697
[patch.crates-io]
9798
datafusion = { git = "https://github.com/apache/datafusion.git", rev = "518560246e87d489eba6d511fa167aa429b06728" }
9899
datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "518560246e87d489eba6d511fa167aa429b06728" }

dev/check_crates_patch.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#!/usr/bin/env python3
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
19+
"""Check that no Cargo.toml files contain [patch.crates-io] entries.
20+
21+
Release builds must not depend on patched crates. During development it is
22+
common to temporarily patch crates-io dependencies, but those patches must
23+
be removed before creating a release.
24+
25+
An empty [patch.crates-io] section is allowed.
26+
"""
27+
28+
import sys
29+
from pathlib import Path
30+
31+
import tomllib
32+
33+
34+
def main() -> int:
35+
errors: list[str] = []
36+
for cargo_toml in sorted(Path().rglob("Cargo.toml")):
37+
if "target" in cargo_toml.parts:
38+
continue
39+
with Path.open(cargo_toml, "rb") as f:
40+
data = tomllib.load(f)
41+
patch = data.get("patch", {}).get("crates-io", {})
42+
if patch:
43+
errors.append(str(cargo_toml))
44+
for name, spec in patch.items():
45+
errors.append(f" {name} = {spec}")
46+
47+
if errors:
48+
print("ERROR: Release builds must not contain [patch.crates-io] entries.")
49+
print()
50+
for line in errors:
51+
print(line)
52+
print()
53+
print("Remove all [patch.crates-io] entries before creating a release.")
54+
return 1
55+
56+
print("OK: No [patch.crates-io] entries found.")
57+
return 0
58+
59+
60+
if __name__ == "__main__":
61+
sys.exit(main())

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ features = ["substrait"]
7070
[tool.pytest.ini_options]
7171
asyncio_mode = "auto"
7272
asyncio_default_fixture_loop_scope = "function"
73+
addopts = "--doctest-modules"
74+
doctest_optionflags = ["NORMALIZE_WHITESPACE", "ELLIPSIS"]
75+
testpaths = ["python/tests", "python/datafusion"]
7376

7477
# Enable docstring linting using the google style guide
7578
[tool.ruff.lint]

python/datafusion/dataframe.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -327,8 +327,9 @@ def into_view(self, temporary: bool = False) -> Table:
327327
>>> df = ctx.sql("SELECT 1 AS value")
328328
>>> view = df.into_view()
329329
>>> ctx.register_table("values_view", view)
330-
>>> df.collect() # The DataFrame is still usable
331-
>>> ctx.sql("SELECT value FROM values_view").collect()
330+
>>> result = ctx.sql("SELECT value FROM values_view").collect()
331+
>>> result[0].column("value").to_pylist()
332+
[1]
332333
"""
333334
from datafusion.catalog import Table as _Table
334335

@@ -1379,9 +1380,12 @@ def fill_null(self, value: Any, subset: list[str] | None = None) -> DataFrame:
13791380
DataFrame with null values replaced where type casting is possible
13801381
13811382
Examples:
1382-
>>> df = df.fill_null(0) # Fill all nulls with 0 where possible
1383-
>>> # Fill nulls in specific string columns
1384-
>>> df = df.fill_null("missing", subset=["name", "category"])
1383+
>>> from datafusion import SessionContext, col
1384+
>>> ctx = SessionContext()
1385+
>>> df = ctx.from_pydict({"a": [1, None, 3], "b": [None, 5, 6]})
1386+
>>> filled = df.fill_null(0)
1387+
>>> filled.sort(col("a")).collect()[0].column("a").to_pylist()
1388+
[0, 1, 3]
13851389
13861390
Notes:
13871391
- Only fills nulls in columns where the value can be cast to the column type

0 commit comments

Comments
 (0)