Skip to content

Commit d12b8ce

Browse files
committed
Cover optional argument examples for window and value functions
1 parent 92dabde commit d12b8ce

File tree

1 file changed

+113
-0
lines changed

1 file changed

+113
-0
lines changed

python/datafusion/functions.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4227,6 +4227,17 @@ def first_value(
42274227
... )
42284228
>>> result.collect_column("v")[0].as_py()
42294229
10
4230+
4231+
>>> df = ctx.from_pydict({"a": [None, 20, 10]})
4232+
>>> result = df.aggregate(
4233+
... [], [dfn.functions.first_value(
4234+
... dfn.col("a"),
4235+
... filter=dfn.col("a") > dfn.lit(10),
4236+
... order_by="a",
4237+
... null_treatment=dfn.common.NullTreatment.IGNORE_NULLS,
4238+
... ).alias("v")])
4239+
>>> result.collect_column("v")[0].as_py()
4240+
20
42304241
"""
42314242
order_by_raw = sort_list_to_raw_sort_list(order_by)
42324243
filter_raw = filter.expr if filter is not None else None
@@ -4269,6 +4280,17 @@ def last_value(
42694280
... )
42704281
>>> result.collect_column("v")[0].as_py()
42714282
30
4283+
4284+
>>> df = ctx.from_pydict({"a": [None, 20, 10]})
4285+
>>> result = df.aggregate(
4286+
... [], [dfn.functions.last_value(
4287+
... dfn.col("a"),
4288+
... filter=dfn.col("a") > dfn.lit(10),
4289+
... order_by="a",
4290+
... null_treatment=dfn.common.NullTreatment.IGNORE_NULLS,
4291+
... ).alias("v")])
4292+
>>> result.collect_column("v")[0].as_py()
4293+
20
42724294
"""
42734295
order_by_raw = sort_list_to_raw_sort_list(order_by)
42744296
filter_raw = filter.expr if filter is not None else None
@@ -4313,6 +4335,17 @@ def nth_value(
43134335
... )
43144336
>>> result.collect_column("v")[0].as_py()
43154337
20
4338+
4339+
>>> df = ctx.from_pydict({"a": [None, 20, 10]})
4340+
>>> result = df.aggregate(
4341+
... [], [dfn.functions.nth_value(
4342+
... dfn.col("a"), 1,
4343+
... filter=dfn.col("a") > dfn.lit(10),
4344+
... order_by="a",
4345+
... null_treatment=dfn.common.NullTreatment.IGNORE_NULLS,
4346+
... ).alias("v")])
4347+
>>> result.collect_column("v")[0].as_py()
4348+
20
43164349
"""
43174350
order_by_raw = sort_list_to_raw_sort_list(order_by)
43184351
filter_raw = filter.expr if filter is not None else None
@@ -4531,6 +4564,16 @@ def lead(
45314564
... default_value=0, order_by="a").alias("lead"))
45324565
>>> result.sort(dfn.col("a")).collect_column("lead").to_pylist()
45334566
[2, 3, 0]
4567+
4568+
>>> df = ctx.from_pydict({"g": ["a", "a", "b"], "v": [1, 2, 3]})
4569+
>>> result = df.select(
4570+
... dfn.col("g"), dfn.col("v"),
4571+
... dfn.functions.lead(
4572+
... dfn.col("v"), shift_offset=1, default_value=0,
4573+
... partition_by=dfn.col("g"), order_by="v",
4574+
... ).alias("lead"))
4575+
>>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("lead").to_pylist()
4576+
[2, 0, 0]
45344577
"""
45354578
if not isinstance(default_value, pa.Scalar) and default_value is not None:
45364579
default_value = pa.scalar(default_value)
@@ -4591,6 +4634,16 @@ def lag(
45914634
... default_value=0, order_by="a").alias("lag"))
45924635
>>> result.sort(dfn.col("a")).collect_column("lag").to_pylist()
45934636
[0, 1, 2]
4637+
4638+
>>> df = ctx.from_pydict({"g": ["a", "a", "b"], "v": [1, 2, 3]})
4639+
>>> result = df.select(
4640+
... dfn.col("g"), dfn.col("v"),
4641+
... dfn.functions.lag(
4642+
... dfn.col("v"), shift_offset=1, default_value=0,
4643+
... partition_by=dfn.col("g"), order_by="v",
4644+
... ).alias("lag"))
4645+
>>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("lag").to_pylist()
4646+
[0, 1, 0]
45944647
"""
45954648
if not isinstance(default_value, pa.Scalar):
45964649
default_value = pa.scalar(default_value)
@@ -4640,6 +4693,16 @@ def row_number(
46404693
... dfn.col("a"), dfn.functions.row_number(order_by="a").alias("rn"))
46414694
>>> result.sort(dfn.col("a")).collect_column("rn").to_pylist()
46424695
[1, 2, 3]
4696+
4697+
>>> df = ctx.from_pydict(
4698+
... {"g": ["a", "a", "b", "b"], "v": [1, 2, 3, 4]})
4699+
>>> result = df.select(
4700+
... dfn.col("g"), dfn.col("v"),
4701+
... dfn.functions.row_number(
4702+
... partition_by=dfn.col("g"), order_by="v",
4703+
... ).alias("rn"))
4704+
>>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("rn").to_pylist()
4705+
[1, 2, 1, 2]
46434706
"""
46444707
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
46454708
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4689,6 +4752,16 @@ def rank(
46894752
... )
46904753
>>> result.sort(dfn.col("a")).collect_column("rnk").to_pylist()
46914754
[1, 1, 3]
4755+
4756+
>>> df = ctx.from_pydict(
4757+
... {"g": ["a", "a", "b", "b"], "v": [1, 1, 2, 3]})
4758+
>>> result = df.select(
4759+
... dfn.col("g"), dfn.col("v"),
4760+
... dfn.functions.rank(
4761+
... partition_by=dfn.col("g"), order_by="v",
4762+
... ).alias("rnk"))
4763+
>>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("rnk").to_pylist()
4764+
[1, 1, 1, 2]
46924765
"""
46934766
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
46944767
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4732,6 +4805,16 @@ def dense_rank(
47324805
... dfn.col("a"), dfn.functions.dense_rank(order_by="a").alias("dr"))
47334806
>>> result.sort(dfn.col("a")).collect_column("dr").to_pylist()
47344807
[1, 1, 2]
4808+
4809+
>>> df = ctx.from_pydict(
4810+
... {"g": ["a", "a", "b", "b"], "v": [1, 1, 2, 3]})
4811+
>>> result = df.select(
4812+
... dfn.col("g"), dfn.col("v"),
4813+
... dfn.functions.dense_rank(
4814+
... partition_by=dfn.col("g"), order_by="v",
4815+
... ).alias("dr"))
4816+
>>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("dr").to_pylist()
4817+
[1, 1, 1, 2]
47354818
"""
47364819
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
47374820
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4777,6 +4860,16 @@ def percent_rank(
47774860
... dfn.col("a"), dfn.functions.percent_rank(order_by="a").alias("pr"))
47784861
>>> result.sort(dfn.col("a")).collect_column("pr").to_pylist()
47794862
[0.0, 0.5, 1.0]
4863+
4864+
>>> df = ctx.from_pydict(
4865+
... {"g": ["a", "a", "a", "b", "b"], "v": [1, 2, 3, 4, 5]})
4866+
>>> result = df.select(
4867+
... dfn.col("g"), dfn.col("v"),
4868+
... dfn.functions.percent_rank(
4869+
... partition_by=dfn.col("g"), order_by="v",
4870+
... ).alias("pr"))
4871+
>>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("pr").to_pylist()
4872+
[0.0, 0.5, 1.0, 0.0, 1.0]
47804873
"""
47814874
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
47824875
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4825,6 +4918,16 @@ def cume_dist(
48254918
... )
48264919
>>> result.collect_column("cd").to_pylist()
48274920
[0.25..., 0.75..., 0.75..., 1.0...]
4921+
4922+
>>> df = ctx.from_pydict(
4923+
... {"g": ["a", "a", "b", "b"], "v": [1, 2, 3, 4]})
4924+
>>> result = df.select(
4925+
... dfn.col("g"), dfn.col("v"),
4926+
... dfn.functions.cume_dist(
4927+
... partition_by=dfn.col("g"), order_by="v",
4928+
... ).alias("cd"))
4929+
>>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("cd").to_pylist()
4930+
[0.5, 1.0, 0.5, 1.0]
48284931
"""
48294932
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
48304933
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4873,6 +4976,16 @@ def ntile(
48734976
... dfn.col("a"), dfn.functions.ntile(2, order_by="a").alias("nt"))
48744977
>>> result.sort(dfn.col("a")).collect_column("nt").to_pylist()
48754978
[1, 1, 2, 2]
4979+
4980+
>>> df = ctx.from_pydict(
4981+
... {"g": ["a", "a", "b", "b"], "v": [1, 2, 3, 4]})
4982+
>>> result = df.select(
4983+
... dfn.col("g"), dfn.col("v"),
4984+
... dfn.functions.ntile(
4985+
... 2, partition_by=dfn.col("g"), order_by="v",
4986+
... ).alias("nt"))
4987+
>>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("nt").to_pylist()
4988+
[1, 2, 1, 2]
48764989
"""
48774990
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
48784991
order_by_raw = sort_list_to_raw_sort_list(order_by)

0 commit comments

Comments
 (0)