@@ -3323,10 +3323,6 @@ def array_agg(
33233323 filter: If provided, only compute against rows for which the filter is True
33243324 order_by: Order the resultant array values. Accepts column names or expressions.
33253325
3326- For example::
3327-
3328- df.aggregate([], array_agg(col("a"), order_by="b"))
3329-
33303326 Examples:
33313327 ---------
33323328 >>> ctx = dfn.SessionContext()
@@ -4047,9 +4043,14 @@ def first_value(
40474043 column names or expressions.
40484044 null_treatment: Assign whether to respect or ignore null values.
40494045
4050- For example::
4051-
4052- df.aggregate([], first_value(col("a"), order_by="ts"))
4046+ Examples:
4047+ >>> ctx = dfn.SessionContext()
4048+ >>> df = ctx.from_pydict({"a": [10, 20, 30]})
4049+ >>> result = df.aggregate(
4050+ ... [], [dfn.functions.first_value(dfn.col("a")).alias("v")]
4051+ ... )
4052+ >>> result.collect_column("v")[0].as_py()
4053+ 10
40534054 """
40544055 order_by_raw = sort_list_to_raw_sort_list (order_by )
40554056 filter_raw = filter .expr if filter is not None else None
@@ -4084,9 +4085,14 @@ def last_value(
40844085 column names or expressions.
40854086 null_treatment: Assign whether to respect or ignore null values.
40864087
4087- For example::
4088-
4089- df.aggregate([], last_value(col("a"), order_by="ts"))
4088+ Examples:
4089+ >>> ctx = dfn.SessionContext()
4090+ >>> df = ctx.from_pydict({"a": [10, 20, 30]})
4091+ >>> result = df.aggregate(
4092+ ... [], [dfn.functions.last_value(dfn.col("a")).alias("v")]
4093+ ... )
4094+ >>> result.collect_column("v")[0].as_py()
4095+ 30
40904096 """
40914097 order_by_raw = sort_list_to_raw_sort_list (order_by )
40924098 filter_raw = filter .expr if filter is not None else None
@@ -4123,9 +4129,14 @@ def nth_value(
41234129 column names or expressions.
41244130 null_treatment: Assign whether to respect or ignore null values.
41254131
4126- For example::
4127-
4128- df.aggregate([], nth_value(col("a"), 2, order_by="ts"))
4132+ Examples:
4133+ >>> ctx = dfn.SessionContext()
4134+ >>> df = ctx.from_pydict({"a": [10, 20, 30]})
4135+ >>> result = df.aggregate(
4136+ ... [], [dfn.functions.nth_value(dfn.col("a"), 2).alias("v")]
4137+ ... )
4138+ >>> result.collect_column("v")[0].as_py()
4139+ 20
41294140 """
41304141 order_by_raw = sort_list_to_raw_sort_list (order_by )
41314142 filter_raw = filter .expr if filter is not None else None
@@ -4303,9 +4314,14 @@ def lead(
43034314 order_by: Set ordering within the window frame. Accepts
43044315 column names or expressions.
43054316
4306- For example::
4307-
4308- lead(col("b"), order_by="ts")
4317+ Examples:
4318+ >>> ctx = dfn.SessionContext()
4319+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
4320+ >>> result = df.select(
4321+ ... dfn.col("a"), dfn.functions.lead(dfn.col("a"), shift_offset=1,
4322+ ... default_value=0, order_by="a").alias("lead"))
4323+ >>> result.sort(dfn.col("a")).collect_column("lead").to_pylist()
4324+ [2, 3, 0]
43094325 """
43104326 if not isinstance (default_value , pa .Scalar ) and default_value is not None :
43114327 default_value = pa .scalar (default_value )
@@ -4358,9 +4374,14 @@ def lag(
43584374 order_by: Set ordering within the window frame. Accepts
43594375 column names or expressions.
43604376
4361- For example::
4362-
4363- lag(col("b"), order_by="ts")
4377+ Examples:
4378+ >>> ctx = dfn.SessionContext()
4379+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
4380+ >>> result = df.select(
4381+ ... dfn.col("a"), dfn.functions.lag(dfn.col("a"), shift_offset=1,
4382+ ... default_value=0, order_by="a").alias("lag"))
4383+ >>> result.sort(dfn.col("a")).collect_column("lag").to_pylist()
4384+ [0, 1, 2]
43644385 """
43654386 if not isinstance (default_value , pa .Scalar ):
43664387 default_value = pa .scalar (default_value )
@@ -4403,9 +4424,13 @@ def row_number(
44034424 order_by: Set ordering within the window frame. Accepts
44044425 column names or expressions.
44054426
4406- For example::
4407-
4408- row_number(order_by="points")
4427+ Examples:
4428+ >>> ctx = dfn.SessionContext()
4429+ >>> df = ctx.from_pydict({"a": [10, 20, 30]})
4430+ >>> result = df.select(
4431+ ... dfn.col("a"), dfn.functions.row_number(order_by="a").alias("rn"))
4432+ >>> result.sort(dfn.col("a")).collect_column("rn").to_pylist()
4433+ [1, 2, 3]
44094434 """
44104435 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
44114436 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -4447,9 +4472,14 @@ def rank(
44474472 order_by: Set ordering within the window frame. Accepts
44484473 column names or expressions.
44494474
4450- For example::
4451-
4452- rank(order_by="points")
4475+ Examples:
4476+ >>> ctx = dfn.SessionContext()
4477+ >>> df = ctx.from_pydict({"a": [10, 10, 20]})
4478+ >>> result = df.select(
4479+ ... dfn.col("a"), dfn.functions.rank(order_by="a").alias("rnk")
4480+ ... )
4481+ >>> result.sort(dfn.col("a")).collect_column("rnk").to_pylist()
4482+ [1, 1, 3]
44534483 """
44544484 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
44554485 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -4486,9 +4516,13 @@ def dense_rank(
44864516 order_by: Set ordering within the window frame. Accepts
44874517 column names or expressions.
44884518
4489- For example::
4490-
4491- dense_rank(order_by="points")
4519+ Examples:
4520+ >>> ctx = dfn.SessionContext()
4521+ >>> df = ctx.from_pydict({"a": [10, 10, 20]})
4522+ >>> result = df.select(
4523+ ... dfn.col("a"), dfn.functions.dense_rank(order_by="a").alias("dr"))
4524+ >>> result.sort(dfn.col("a")).collect_column("dr").to_pylist()
4525+ [1, 1, 2]
44924526 """
44934527 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
44944528 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -4526,9 +4560,14 @@ def percent_rank(
45264560 order_by: Set ordering within the window frame. Accepts
45274561 column names or expressions.
45284562
4529- For example::
45304563
4531- percent_rank(order_by="points")
4564+ Examples:
4565+ >>> ctx = dfn.SessionContext()
4566+ >>> df = ctx.from_pydict({"a": [10, 20, 30]})
4567+ >>> result = df.select(
4568+ ... dfn.col("a"), dfn.functions.percent_rank(order_by="a").alias("pr"))
4569+ >>> result.sort(dfn.col("a")).collect_column("pr").to_pylist()
4570+ [0.0, 0.5, 1.0]
45324571 """
45334572 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
45344573 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -4566,9 +4605,17 @@ def cume_dist(
45664605 order_by: Set ordering within the window frame. Accepts
45674606 column names or expressions.
45684607
4569- For example::
4570-
4571- cume_dist(order_by="points")
4608+ Examples:
4609+ >>> ctx = dfn.SessionContext()
4610+ >>> df = ctx.from_pydict({"a": [1., 2., 2., 3.]})
4611+ >>> result = df.select(
4612+ ... dfn.col("a"),
4613+ ... dfn.functions.cume_dist(
4614+ ... order_by="a"
4615+ ... ).alias("cd")
4616+ ... )
4617+ >>> result.collect_column("cd").to_pylist()
4618+ [0.25..., 0.75..., 0.75..., 1.0...]
45724619 """
45734620 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
45744621 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -4610,9 +4657,13 @@ def ntile(
46104657 order_by: Set ordering within the window frame. Accepts
46114658 column names or expressions.
46124659
4613- For example::
4614-
4615- ntile(3, order_by="points")
4660+ Examples:
4661+ >>> ctx = dfn.SessionContext()
4662+ >>> df = ctx.from_pydict({"a": [10, 20, 30, 40]})
4663+ >>> result = df.select(
4664+ ... dfn.col("a"), dfn.functions.ntile(2, order_by="a").alias("nt"))
4665+ >>> result.sort(dfn.col("a")).collect_column("nt").to_pylist()
4666+ [1, 1, 2, 2]
46164667 """
46174668 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
46184669 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -4648,10 +4699,6 @@ def string_agg(
46484699 order_by: Set the ordering of the expression to evaluate. Accepts
46494700 column names or expressions.
46504701
4651- For example::
4652-
4653- df.aggregate([], string_agg(col("a"), ",", order_by="b"))
4654-
46554702 Examples:
46564703 ---------
46574704 >>> ctx = dfn.SessionContext()
0 commit comments