@@ -4227,6 +4227,17 @@ def first_value(
42274227 ... )
42284228 >>> result.collect_column("v")[0].as_py()
42294229 10
4230+
4231+ >>> df = ctx.from_pydict({"a": [None, 20, 10]})
4232+ >>> result = df.aggregate(
4233+ ... [], [dfn.functions.first_value(
4234+ ... dfn.col("a"),
4235+ ... filter=dfn.col("a") > dfn.lit(10),
4236+ ... order_by="a",
4237+ ... null_treatment=dfn.common.NullTreatment.IGNORE_NULLS,
4238+ ... ).alias("v")])
4239+ >>> result.collect_column("v")[0].as_py()
4240+ 20
42304241 """
42314242 order_by_raw = sort_list_to_raw_sort_list (order_by )
42324243 filter_raw = filter .expr if filter is not None else None
@@ -4269,6 +4280,17 @@ def last_value(
42694280 ... )
42704281 >>> result.collect_column("v")[0].as_py()
42714282 30
4283+
4284+ >>> df = ctx.from_pydict({"a": [None, 20, 10]})
4285+ >>> result = df.aggregate(
4286+ ... [], [dfn.functions.last_value(
4287+ ... dfn.col("a"),
4288+ ... filter=dfn.col("a") > dfn.lit(10),
4289+ ... order_by="a",
4290+ ... null_treatment=dfn.common.NullTreatment.IGNORE_NULLS,
4291+ ... ).alias("v")])
4292+ >>> result.collect_column("v")[0].as_py()
4293+ 20
42724294 """
42734295 order_by_raw = sort_list_to_raw_sort_list (order_by )
42744296 filter_raw = filter .expr if filter is not None else None
@@ -4313,6 +4335,17 @@ def nth_value(
43134335 ... )
43144336 >>> result.collect_column("v")[0].as_py()
43154337 20
4338+
4339+ >>> df = ctx.from_pydict({"a": [None, 20, 10]})
4340+ >>> result = df.aggregate(
4341+ ... [], [dfn.functions.nth_value(
4342+ ... dfn.col("a"), 1,
4343+ ... filter=dfn.col("a") > dfn.lit(10),
4344+ ... order_by="a",
4345+ ... null_treatment=dfn.common.NullTreatment.IGNORE_NULLS,
4346+ ... ).alias("v")])
4347+ >>> result.collect_column("v")[0].as_py()
4348+ 20
43164349 """
43174350 order_by_raw = sort_list_to_raw_sort_list (order_by )
43184351 filter_raw = filter .expr if filter is not None else None
@@ -4531,6 +4564,16 @@ def lead(
45314564 ... default_value=0, order_by="a").alias("lead"))
45324565 >>> result.sort(dfn.col("a")).collect_column("lead").to_pylist()
45334566 [2, 3, 0]
4567+
4568+ >>> df = ctx.from_pydict({"g": ["a", "a", "b"], "v": [1, 2, 3]})
4569+ >>> result = df.select(
4570+ ... dfn.col("g"), dfn.col("v"),
4571+ ... dfn.functions.lead(
4572+ ... dfn.col("v"), shift_offset=1, default_value=0,
4573+ ... partition_by=dfn.col("g"), order_by="v",
4574+ ... ).alias("lead"))
4575+ >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("lead").to_pylist()
4576+ [2, 0, 0]
45344577 """
45354578 if not isinstance (default_value , pa .Scalar ) and default_value is not None :
45364579 default_value = pa .scalar (default_value )
@@ -4591,6 +4634,16 @@ def lag(
45914634 ... default_value=0, order_by="a").alias("lag"))
45924635 >>> result.sort(dfn.col("a")).collect_column("lag").to_pylist()
45934636 [0, 1, 2]
4637+
4638+ >>> df = ctx.from_pydict({"g": ["a", "a", "b"], "v": [1, 2, 3]})
4639+ >>> result = df.select(
4640+ ... dfn.col("g"), dfn.col("v"),
4641+ ... dfn.functions.lag(
4642+ ... dfn.col("v"), shift_offset=1, default_value=0,
4643+ ... partition_by=dfn.col("g"), order_by="v",
4644+ ... ).alias("lag"))
4645+ >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("lag").to_pylist()
4646+ [0, 1, 0]
45944647 """
45954648 if not isinstance (default_value , pa .Scalar ):
45964649 default_value = pa .scalar (default_value )
@@ -4640,6 +4693,16 @@ def row_number(
46404693 ... dfn.col("a"), dfn.functions.row_number(order_by="a").alias("rn"))
46414694 >>> result.sort(dfn.col("a")).collect_column("rn").to_pylist()
46424695 [1, 2, 3]
4696+
4697+ >>> df = ctx.from_pydict(
4698+ ... {"g": ["a", "a", "b", "b"], "v": [1, 2, 3, 4]})
4699+ >>> result = df.select(
4700+ ... dfn.col("g"), dfn.col("v"),
4701+ ... dfn.functions.row_number(
4702+ ... partition_by=dfn.col("g"), order_by="v",
4703+ ... ).alias("rn"))
4704+ >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("rn").to_pylist()
4705+ [1, 2, 1, 2]
46434706 """
46444707 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
46454708 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -4689,6 +4752,16 @@ def rank(
46894752 ... )
46904753 >>> result.sort(dfn.col("a")).collect_column("rnk").to_pylist()
46914754 [1, 1, 3]
4755+
4756+ >>> df = ctx.from_pydict(
4757+ ... {"g": ["a", "a", "b", "b"], "v": [1, 1, 2, 3]})
4758+ >>> result = df.select(
4759+ ... dfn.col("g"), dfn.col("v"),
4760+ ... dfn.functions.rank(
4761+ ... partition_by=dfn.col("g"), order_by="v",
4762+ ... ).alias("rnk"))
4763+ >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("rnk").to_pylist()
4764+ [1, 1, 1, 2]
46924765 """
46934766 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
46944767 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -4732,6 +4805,16 @@ def dense_rank(
47324805 ... dfn.col("a"), dfn.functions.dense_rank(order_by="a").alias("dr"))
47334806 >>> result.sort(dfn.col("a")).collect_column("dr").to_pylist()
47344807 [1, 1, 2]
4808+
4809+ >>> df = ctx.from_pydict(
4810+ ... {"g": ["a", "a", "b", "b"], "v": [1, 1, 2, 3]})
4811+ >>> result = df.select(
4812+ ... dfn.col("g"), dfn.col("v"),
4813+ ... dfn.functions.dense_rank(
4814+ ... partition_by=dfn.col("g"), order_by="v",
4815+ ... ).alias("dr"))
4816+ >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("dr").to_pylist()
4817+ [1, 1, 1, 2]
47354818 """
47364819 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
47374820 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -4777,6 +4860,16 @@ def percent_rank(
47774860 ... dfn.col("a"), dfn.functions.percent_rank(order_by="a").alias("pr"))
47784861 >>> result.sort(dfn.col("a")).collect_column("pr").to_pylist()
47794862 [0.0, 0.5, 1.0]
4863+
4864+ >>> df = ctx.from_pydict(
4865+ ... {"g": ["a", "a", "a", "b", "b"], "v": [1, 2, 3, 4, 5]})
4866+ >>> result = df.select(
4867+ ... dfn.col("g"), dfn.col("v"),
4868+ ... dfn.functions.percent_rank(
4869+ ... partition_by=dfn.col("g"), order_by="v",
4870+ ... ).alias("pr"))
4871+ >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("pr").to_pylist()
4872+ [0.0, 0.5, 1.0, 0.0, 1.0]
47804873 """
47814874 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
47824875 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -4825,6 +4918,16 @@ def cume_dist(
48254918 ... )
48264919 >>> result.collect_column("cd").to_pylist()
48274920 [0.25..., 0.75..., 0.75..., 1.0...]
4921+
4922+ >>> df = ctx.from_pydict(
4923+ ... {"g": ["a", "a", "b", "b"], "v": [1, 2, 3, 4]})
4924+ >>> result = df.select(
4925+ ... dfn.col("g"), dfn.col("v"),
4926+ ... dfn.functions.cume_dist(
4927+ ... partition_by=dfn.col("g"), order_by="v",
4928+ ... ).alias("cd"))
4929+ >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("cd").to_pylist()
4930+ [0.5, 1.0, 0.5, 1.0]
48284931 """
48294932 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
48304933 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -4873,6 +4976,16 @@ def ntile(
48734976 ... dfn.col("a"), dfn.functions.ntile(2, order_by="a").alias("nt"))
48744977 >>> result.sort(dfn.col("a")).collect_column("nt").to_pylist()
48754978 [1, 1, 2, 2]
4979+
4980+ >>> df = ctx.from_pydict(
4981+ ... {"g": ["a", "a", "b", "b"], "v": [1, 2, 3, 4]})
4982+ >>> result = df.select(
4983+ ... dfn.col("g"), dfn.col("v"),
4984+ ... dfn.functions.ntile(
4985+ ... 2, partition_by=dfn.col("g"), order_by="v",
4986+ ... ).alias("nt"))
4987+ >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("nt").to_pylist()
4988+ [1, 2, 1, 2]
48764989 """
48774990 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
48784991 order_by_raw = sort_list_to_raw_sort_list (order_by )
0 commit comments