@@ -2516,6 +2516,14 @@ def first_value(
25162516 For example::
25172517
25182518 df.aggregate([], first_value(col("a"), order_by="ts"))
2519+
2520+ Examples:
2521+ ---------
2522+ >>> ctx = dfn.SessionContext()
2523+ >>> df = ctx.from_pydict({"a": [10, 20, 30]})
2524+ >>> result = df.aggregate([], [dfn.functions.first_value(dfn.col("a")).alias("v")])
2525+ >>> result.collect_column("v")[0].as_py()
2526+ 10
25192527 """
25202528 order_by_raw = sort_list_to_raw_sort_list (order_by )
25212529 filter_raw = filter .expr if filter is not None else None
@@ -2553,6 +2561,14 @@ def last_value(
25532561 For example::
25542562
25552563 df.aggregate([], last_value(col("a"), order_by="ts"))
2564+
2565+ Examples:
2566+ ---------
2567+ >>> ctx = dfn.SessionContext()
2568+ >>> df = ctx.from_pydict({"a": [10, 20, 30]})
2569+ >>> result = df.aggregate([], [dfn.functions.last_value(dfn.col("a")).alias("v")])
2570+ >>> result.collect_column("v")[0].as_py()
2571+ 30
25562572 """
25572573 order_by_raw = sort_list_to_raw_sort_list (order_by )
25582574 filter_raw = filter .expr if filter is not None else None
@@ -2592,6 +2608,14 @@ def nth_value(
25922608 For example::
25932609
25942610 df.aggregate([], nth_value(col("a"), 2, order_by="ts"))
2611+
2612+ Examples:
2613+ ---------
2614+ >>> ctx = dfn.SessionContext()
2615+ >>> df = ctx.from_pydict({"a": [10, 20, 30]})
2616+ >>> result = df.aggregate([], [dfn.functions.nth_value(dfn.col("a"), 2).alias("v")])
2617+ >>> result.collect_column("v")[0].as_py()
2618+ 20
25952619 """
25962620 order_by_raw = sort_list_to_raw_sort_list (order_by )
25972621 filter_raw = filter .expr if filter is not None else None
@@ -2732,6 +2756,16 @@ def lead(
27322756 For example::
27332757
27342758 lead(col("b"), order_by="ts")
2759+
2760+ Examples:
2761+ ---------
2762+ >>> ctx = dfn.SessionContext()
2763+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
2764+ >>> result = df.select(
2765+ ... dfn.col("a"), dfn.functions.lead(dfn.col("a"), shift_offset=1,
2766+ ... default_value=0, order_by="a").alias("lead"))
2767+ >>> result.sort(dfn.col("a")).collect_column("lead").to_pylist()
2768+ [2, 3, 0]
27352769 """
27362770 if not isinstance (default_value , pa .Scalar ) and default_value is not None :
27372771 default_value = pa .scalar (default_value )
@@ -2787,6 +2821,16 @@ def lag(
27872821 For example::
27882822
27892823 lag(col("b"), order_by="ts")
2824+
2825+ Examples:
2826+ ---------
2827+ >>> ctx = dfn.SessionContext()
2828+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
2829+ >>> result = df.select(
2830+ ... dfn.col("a"), dfn.functions.lag(dfn.col("a"), shift_offset=1,
2831+ ... default_value=0, order_by="a").alias("lag"))
2832+ >>> result.sort(dfn.col("a")).collect_column("lag").to_pylist()
2833+ [0, 1, 2]
27902834 """
27912835 if not isinstance (default_value , pa .Scalar ):
27922836 default_value = pa .scalar (default_value )
@@ -2832,6 +2876,15 @@ def row_number(
28322876 For example::
28332877
28342878 row_number(order_by="points")
2879+
2880+ Examples:
2881+ ---------
2882+ >>> ctx = dfn.SessionContext()
2883+ >>> df = ctx.from_pydict({"a": [10, 20, 30]})
2884+ >>> result = df.select(
2885+ ... dfn.col("a"), dfn.functions.row_number(order_by="a").alias("rn"))
2886+ >>> result.sort(dfn.col("a")).collect_column("rn").to_pylist()
2887+ [1, 2, 3]
28352888 """
28362889 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
28372890 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -2876,6 +2929,14 @@ def rank(
28762929 For example::
28772930
28782931 rank(order_by="points")
2932+
2933+ Examples:
2934+ ---------
2935+ >>> ctx = dfn.SessionContext()
2936+ >>> df = ctx.from_pydict({"a": [10, 10, 20]})
2937+ >>> result = df.select(dfn.col("a"), dfn.functions.rank(order_by="a").alias("rnk"))
2938+ >>> result.sort(dfn.col("a")).collect_column("rnk").to_pylist()
2939+ [1, 1, 3]
28792940 """
28802941 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
28812942 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -2915,6 +2976,15 @@ def dense_rank(
29152976 For example::
29162977
29172978 dense_rank(order_by="points")
2979+
2980+ Examples:
2981+ ---------
2982+ >>> ctx = dfn.SessionContext()
2983+ >>> df = ctx.from_pydict({"a": [10, 10, 20]})
2984+ >>> result = df.select(
2985+ ... dfn.col("a"), dfn.functions.dense_rank(order_by="a").alias("dr"))
2986+ >>> result.sort(dfn.col("a")).collect_column("dr").to_pylist()
2987+ [1, 1, 2]
29182988 """
29192989 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
29202990 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -2955,6 +3025,15 @@ def percent_rank(
29553025 For example::
29563026
29573027 percent_rank(order_by="points")
3028+
3029+ Examples:
3030+ ---------
3031+ >>> ctx = dfn.SessionContext()
3032+ >>> df = ctx.from_pydict({"a": [10, 20, 30]})
3033+ >>> result = df.select(
3034+ ... dfn.col("a"), dfn.functions.percent_rank(order_by="a").alias("pr"))
3035+ >>> result.sort(dfn.col("a")).collect_column("pr").to_pylist()
3036+ [0.0, 0.5, 1.0]
29583037 """
29593038 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
29603039 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -2995,6 +3074,22 @@ def cume_dist(
29953074 For example::
29963075
29973076 cume_dist(order_by="points")
3077+
3078+ Examples:
3079+ ---------
3080+ >>> ctx = dfn.SessionContext()
3081+ >>> df = ctx.from_pydict({"a": [10, 10, 20]})
3082+ >>> import builtins
3083+ >>> result = df.select(
3084+ ... dfn.col("a"),
3085+ ... dfn.functions.cume_dist(
3086+ ... order_by="a"
3087+ ... ).alias("cd")
3088+ ... )
3089+ >>> [builtins.round(x, 4) for x in
3090+ ... result.sort(dfn.col("a")
3091+ ... ).collect_column("cd").to_pylist()]
3092+ [0.6667, 0.6667, 1.0]
29983093 """
29993094 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
30003095 order_by_raw = sort_list_to_raw_sort_list (order_by )
@@ -3039,6 +3134,15 @@ def ntile(
30393134 For example::
30403135
30413136 ntile(3, order_by="points")
3137+
3138+ Examples:
3139+ ---------
3140+ >>> ctx = dfn.SessionContext()
3141+ >>> df = ctx.from_pydict({"a": [10, 20, 30, 40]})
3142+ >>> result = df.select(
3143+ ... dfn.col("a"), dfn.functions.ntile(2, order_by="a").alias("nt"))
3144+ >>> result.sort(dfn.col("a")).collect_column("nt").to_pylist()
3145+ [1, 1, 2, 2]
30423146 """
30433147 partition_by_raw = expr_list_to_raw_expr_list (partition_by )
30443148 order_by_raw = sort_list_to_raw_sort_list (order_by )
0 commit comments