@@ -405,6 +405,15 @@ def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr:
405405 ... )
406406 >>> result.collect_column("in").to_pylist()
407407 [True, False, True]
408+
409+ >>> result = df.select(
410+ ... dfn.functions.in_list(
411+ ... dfn.col("a"), [dfn.lit(1), dfn.lit(3)],
412+ ... negated=True,
413+ ... ).alias("not_in")
414+ ... )
415+ >>> result.collect_column("not_in").to_pylist()
416+ [False, True, False]
408417 """
409418 values = [v .expr for v in values ]
410419 return Expr (f .in_list (arg .expr , values , negated ))
@@ -469,6 +478,11 @@ def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> So
469478 >>> sort_expr = dfn.functions.order_by(dfn.col("a"), ascending=False)
470479 >>> sort_expr.ascending()
471480 False
481+
482+ >>> sort_expr = dfn.functions.order_by(
483+ ... dfn.col("a"), ascending=True, nulls_first=False)
484+ >>> sort_expr.nulls_first()
485+ False
472486 """
473487 return SortExpr (expr , ascending = ascending , nulls_first = nulls_first )
474488
@@ -488,6 +502,13 @@ def alias(expr: Expr, name: str, metadata: dict[str, str] | None = None) -> Expr
488502 ... dfn.functions.alias(dfn.col("a"), "b")
489503 ... ).collect_column("b")[0].as_py()
490504 1
505+
506+ >>> df.select(
507+ ... dfn.functions.alias(
508+ ... dfn.col("a"), "b", metadata={"info": "test"}
509+ ... )
510+ ... ).collect_column("b")[0].as_py()
511+ 1
491512 """
492513 return Expr (f .alias (expr .expr , name , metadata ))
493514
@@ -1161,6 +1182,13 @@ def lpad(string: Expr, count: Expr, characters: Expr | None = None) -> Expr:
11611182 'the ca'
11621183 >>> lpad_df.collect_column("lpad")[1].as_py()
11631184 ' a hat'
1185+
1186+ >>> result = df.select(
1187+ ... dfn.functions.lpad(
1188+ ... dfn.col("a"), dfn.lit(10), dfn.lit(".")
1189+ ... ).alias("lpad"))
1190+ >>> result.collect_column("lpad")[0].as_py()
1191+ '...the cat'
11641192 """
11651193 characters = characters if characters is not None else Expr .literal (" " )
11661194 return Expr (f .lpad (string .expr , count .expr , characters .expr ))
@@ -1341,6 +1369,17 @@ def regexp_like(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr:
13411369 ... )
13421370 >>> result.collect_column("m")[0].as_py()
13431371 True
1372+
1373+ Use ``flags`` for case-insensitive matching:
1374+
1375+ >>> result = df.select(
1376+ ... dfn.functions.regexp_like(
1377+ ... dfn.col("a"), dfn.lit("HELLO"),
1378+ ... flags=dfn.lit("i"),
1379+ ... ).alias("m")
1380+ ... )
1381+ >>> result.collect_column("m")[0].as_py()
1382+ True
13441383 """
13451384 if flags is not None :
13461385 flags = flags .expr
@@ -1363,6 +1402,17 @@ def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr:
13631402 ... )
13641403 >>> result.collect_column("m")[0].as_py()
13651404 ['42']
1405+
1406+ Use ``flags`` for case-insensitive matching:
1407+
1408+ >>> result = df.select(
1409+ ... dfn.functions.regexp_match(
1410+ ... dfn.col("a"), dfn.lit("(HELLO)"),
1411+ ... flags=dfn.lit("i"),
1412+ ... ).alias("m")
1413+ ... )
1414+ >>> result.collect_column("m")[0].as_py()
1415+ ['hello']
13661416 """
13671417 if flags is not None :
13681418 flags = flags .expr
@@ -1391,6 +1441,18 @@ def regexp_replace(
13911441 ... )
13921442 >>> result.collect_column("r")[0].as_py()
13931443 'hello XX'
1444+
1445+ Use the ``g`` flag to replace all occurrences:
1446+
1447+ >>> df = ctx.from_pydict({"a": ["a1 b2 c3"]})
1448+ >>> result = df.select(
1449+ ... dfn.functions.regexp_replace(
1450+ ... dfn.col("a"), dfn.lit("\\d+"),
1451+ ... dfn.lit("X"), flags=dfn.lit("g"),
1452+ ... ).alias("r")
1453+ ... )
1454+ >>> result.collect_column("r")[0].as_py()
1455+ 'aX bX cX'
13941456 """
13951457 if flags is not None :
13961458 flags = flags .expr
@@ -1412,6 +1474,17 @@ def regexp_count(
14121474 ... dfn.functions.regexp_count(dfn.col("a"), dfn.lit("abc")).alias("c"))
14131475 >>> result.collect_column("c")[0].as_py()
14141476 2
1477+
1478+ Use ``start`` to begin searching from a position, and
1479+ ``flags`` for case-insensitive matching:
1480+
1481+ >>> result = df.select(
1482+ ... dfn.functions.regexp_count(
1483+ ... dfn.col("a"), dfn.lit("ABC"),
1484+ ... start=dfn.lit(4), flags=dfn.lit("i"),
1485+ ... ).alias("c"))
1486+ >>> result.collect_column("c")[0].as_py()
1487+ 1
14151488 """
14161489 if flags is not None :
14171490 flags = flags .expr
@@ -1447,6 +1520,31 @@ def regexp_instr(
14471520 ... )
14481521 >>> result.collect_column("pos")[0].as_py()
14491522 7
1523+
1524+ Use ``start`` to search from a position, ``n`` for the
1525+ nth occurrence, and ``flags`` for case-insensitive mode:
1526+
1527+ >>> df = ctx.from_pydict({"a": ["abc ABC abc"]})
1528+ >>> result = df.select(
1529+ ... dfn.functions.regexp_instr(
1530+ ... dfn.col("a"), dfn.lit("abc"),
1531+ ... start=dfn.lit(2), n=dfn.lit(1),
1532+ ... flags=dfn.lit("i"),
1533+ ... ).alias("pos")
1534+ ... )
1535+ >>> result.collect_column("pos")[0].as_py()
1536+ 5
1537+
1538+ Use ``sub_expr`` to get the position of a capture group:
1539+
1540+ >>> result = df.select(
1541+ ... dfn.functions.regexp_instr(
1542+ ... dfn.col("a"), dfn.lit("(abc)"),
1543+ ... sub_expr=dfn.lit(1),
1544+ ... ).alias("pos")
1545+ ... )
1546+ >>> result.collect_column("pos")[0].as_py()
1547+ 1
14501548 """
14511549 start = start .expr if start is not None else None
14521550 n = n .expr if n is not None else None
@@ -2196,6 +2294,13 @@ def trunc(num: Expr, precision: Expr | None = None) -> Expr:
21962294 >>> result = df.select(dfn.functions.trunc(dfn.col("a")).alias("t"))
21972295 >>> result.collect_column("t")[0].as_py()
21982296 1.0
2297+
2298+ >>> result = df.select(
2299+ ... dfn.functions.trunc(
2300+ ... dfn.col("a"), precision=dfn.lit(2)
2301+ ... ).alias("t"))
2302+ >>> result.collect_column("t")[0].as_py()
2303+ 1.56
21992304 """
22002305 if precision is not None :
22012306 return Expr (f .trunc (num .expr , precision .expr ))
0 commit comments