Skip to content

Commit 548cddb

Browse files
ntjohnson1claude
andcommitted
Add docstring examples for Aggregate window functions
Add example usage to docstrings for Aggregate window functions to improve documentation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1160d5a commit 548cddb

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed

python/datafusion/functions.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2516,6 +2516,14 @@ def first_value(
25162516
For example::
25172517
25182518
df.aggregate([], first_value(col("a"), order_by="ts"))
2519+
2520+
Examples:
2521+
---------
2522+
>>> ctx = dfn.SessionContext()
2523+
>>> df = ctx.from_pydict({"a": [10, 20, 30]})
2524+
>>> result = df.aggregate([], [dfn.functions.first_value(dfn.col("a")).alias("v")])
2525+
>>> result.collect_column("v")[0].as_py()
2526+
10
25192527
"""
25202528
order_by_raw = sort_list_to_raw_sort_list(order_by)
25212529
filter_raw = filter.expr if filter is not None else None
@@ -2553,6 +2561,14 @@ def last_value(
25532561
For example::
25542562
25552563
df.aggregate([], last_value(col("a"), order_by="ts"))
2564+
2565+
Examples:
2566+
---------
2567+
>>> ctx = dfn.SessionContext()
2568+
>>> df = ctx.from_pydict({"a": [10, 20, 30]})
2569+
>>> result = df.aggregate([], [dfn.functions.last_value(dfn.col("a")).alias("v")])
2570+
>>> result.collect_column("v")[0].as_py()
2571+
30
25562572
"""
25572573
order_by_raw = sort_list_to_raw_sort_list(order_by)
25582574
filter_raw = filter.expr if filter is not None else None
@@ -2592,6 +2608,14 @@ def nth_value(
25922608
For example::
25932609
25942610
df.aggregate([], nth_value(col("a"), 2, order_by="ts"))
2611+
2612+
Examples:
2613+
---------
2614+
>>> ctx = dfn.SessionContext()
2615+
>>> df = ctx.from_pydict({"a": [10, 20, 30]})
2616+
>>> result = df.aggregate([], [dfn.functions.nth_value(dfn.col("a"), 2).alias("v")])
2617+
>>> result.collect_column("v")[0].as_py()
2618+
20
25952619
"""
25962620
order_by_raw = sort_list_to_raw_sort_list(order_by)
25972621
filter_raw = filter.expr if filter is not None else None
@@ -2732,6 +2756,16 @@ def lead(
27322756
For example::
27332757
27342758
lead(col("b"), order_by="ts")
2759+
2760+
Examples:
2761+
---------
2762+
>>> ctx = dfn.SessionContext()
2763+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
2764+
>>> result = df.select(
2765+
... dfn.col("a"), dfn.functions.lead(dfn.col("a"), shift_offset=1,
2766+
... default_value=0, order_by="a").alias("lead"))
2767+
>>> result.sort(dfn.col("a")).collect_column("lead").to_pylist()
2768+
[2, 3, 0]
27352769
"""
27362770
if not isinstance(default_value, pa.Scalar) and default_value is not None:
27372771
default_value = pa.scalar(default_value)
@@ -2787,6 +2821,16 @@ def lag(
27872821
For example::
27882822
27892823
lag(col("b"), order_by="ts")
2824+
2825+
Examples:
2826+
---------
2827+
>>> ctx = dfn.SessionContext()
2828+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
2829+
>>> result = df.select(
2830+
... dfn.col("a"), dfn.functions.lag(dfn.col("a"), shift_offset=1,
2831+
... default_value=0, order_by="a").alias("lag"))
2832+
>>> result.sort(dfn.col("a")).collect_column("lag").to_pylist()
2833+
[0, 1, 2]
27902834
"""
27912835
if not isinstance(default_value, pa.Scalar):
27922836
default_value = pa.scalar(default_value)
@@ -2832,6 +2876,15 @@ def row_number(
28322876
For example::
28332877
28342878
row_number(order_by="points")
2879+
2880+
Examples:
2881+
---------
2882+
>>> ctx = dfn.SessionContext()
2883+
>>> df = ctx.from_pydict({"a": [10, 20, 30]})
2884+
>>> result = df.select(
2885+
... dfn.col("a"), dfn.functions.row_number(order_by="a").alias("rn"))
2886+
>>> result.sort(dfn.col("a")).collect_column("rn").to_pylist()
2887+
[1, 2, 3]
28352888
"""
28362889
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
28372890
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -2876,6 +2929,14 @@ def rank(
28762929
For example::
28772930
28782931
rank(order_by="points")
2932+
2933+
Examples:
2934+
---------
2935+
>>> ctx = dfn.SessionContext()
2936+
>>> df = ctx.from_pydict({"a": [10, 10, 20]})
2937+
>>> result = df.select(dfn.col("a"), dfn.functions.rank(order_by="a").alias("rnk"))
2938+
>>> result.sort(dfn.col("a")).collect_column("rnk").to_pylist()
2939+
[1, 1, 3]
28792940
"""
28802941
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
28812942
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -2915,6 +2976,15 @@ def dense_rank(
29152976
For example::
29162977
29172978
dense_rank(order_by="points")
2979+
2980+
Examples:
2981+
---------
2982+
>>> ctx = dfn.SessionContext()
2983+
>>> df = ctx.from_pydict({"a": [10, 10, 20]})
2984+
>>> result = df.select(
2985+
... dfn.col("a"), dfn.functions.dense_rank(order_by="a").alias("dr"))
2986+
>>> result.sort(dfn.col("a")).collect_column("dr").to_pylist()
2987+
[1, 1, 2]
29182988
"""
29192989
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
29202990
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -2955,6 +3025,15 @@ def percent_rank(
29553025
For example::
29563026
29573027
percent_rank(order_by="points")
3028+
3029+
Examples:
3030+
---------
3031+
>>> ctx = dfn.SessionContext()
3032+
>>> df = ctx.from_pydict({"a": [10, 20, 30]})
3033+
>>> result = df.select(
3034+
... dfn.col("a"), dfn.functions.percent_rank(order_by="a").alias("pr"))
3035+
>>> result.sort(dfn.col("a")).collect_column("pr").to_pylist()
3036+
[0.0, 0.5, 1.0]
29583037
"""
29593038
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
29603039
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -2995,6 +3074,22 @@ def cume_dist(
29953074
For example::
29963075
29973076
cume_dist(order_by="points")
3077+
3078+
Examples:
3079+
---------
3080+
>>> ctx = dfn.SessionContext()
3081+
>>> df = ctx.from_pydict({"a": [10, 10, 20]})
3082+
>>> import builtins
3083+
>>> result = df.select(
3084+
... dfn.col("a"),
3085+
... dfn.functions.cume_dist(
3086+
... order_by="a"
3087+
... ).alias("cd")
3088+
... )
3089+
>>> [builtins.round(x, 4) for x in
3090+
... result.sort(dfn.col("a")
3091+
... ).collect_column("cd").to_pylist()]
3092+
[0.6667, 0.6667, 1.0]
29983093
"""
29993094
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
30003095
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -3039,6 +3134,15 @@ def ntile(
30393134
For example::
30403135
30413136
ntile(3, order_by="points")
3137+
3138+
Examples:
3139+
---------
3140+
>>> ctx = dfn.SessionContext()
3141+
>>> df = ctx.from_pydict({"a": [10, 20, 30, 40]})
3142+
>>> result = df.select(
3143+
... dfn.col("a"), dfn.functions.ntile(2, order_by="a").alias("nt"))
3144+
>>> result.sort(dfn.col("a")).collect_column("nt").to_pylist()
3145+
[1, 1, 2, 2]
30423146
"""
30433147
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
30443148
order_by_raw = sort_list_to_raw_sort_list(order_by)

0 commit comments

Comments
 (0)