Skip to content

Commit fd4562e

Browse files
committed
Cover optional arguments for scalar functions
1 parent d12b8ce commit fd4562e

File tree

1 file changed

+105
-0
lines changed

1 file changed

+105
-0
lines changed

python/datafusion/functions.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,15 @@ def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr:
405405
... )
406406
>>> result.collect_column("in").to_pylist()
407407
[True, False, True]
408+
409+
>>> result = df.select(
410+
... dfn.functions.in_list(
411+
... dfn.col("a"), [dfn.lit(1), dfn.lit(3)],
412+
... negated=True,
413+
... ).alias("not_in")
414+
... )
415+
>>> result.collect_column("not_in").to_pylist()
416+
[False, True, False]
408417
"""
409418
values = [v.expr for v in values]
410419
return Expr(f.in_list(arg.expr, values, negated))
@@ -469,6 +478,11 @@ def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> So
469478
>>> sort_expr = dfn.functions.order_by(dfn.col("a"), ascending=False)
470479
>>> sort_expr.ascending()
471480
False
481+
482+
>>> sort_expr = dfn.functions.order_by(
483+
... dfn.col("a"), ascending=True, nulls_first=False)
484+
>>> sort_expr.nulls_first()
485+
False
472486
"""
473487
return SortExpr(expr, ascending=ascending, nulls_first=nulls_first)
474488

@@ -488,6 +502,13 @@ def alias(expr: Expr, name: str, metadata: dict[str, str] | None = None) -> Expr
488502
... dfn.functions.alias(dfn.col("a"), "b")
489503
... ).collect_column("b")[0].as_py()
490504
1
505+
506+
>>> df.select(
507+
... dfn.functions.alias(
508+
... dfn.col("a"), "b", metadata={"info": "test"}
509+
... )
510+
... ).collect_column("b")[0].as_py()
511+
1
491512
"""
492513
return Expr(f.alias(expr.expr, name, metadata))
493514

@@ -1161,6 +1182,13 @@ def lpad(string: Expr, count: Expr, characters: Expr | None = None) -> Expr:
11611182
'the ca'
11621183
>>> lpad_df.collect_column("lpad")[1].as_py()
11631184
' a hat'
1185+
1186+
>>> result = df.select(
1187+
... dfn.functions.lpad(
1188+
... dfn.col("a"), dfn.lit(10), dfn.lit(".")
1189+
... ).alias("lpad"))
1190+
>>> result.collect_column("lpad")[0].as_py()
1191+
'...the cat'
11641192
"""
11651193
characters = characters if characters is not None else Expr.literal(" ")
11661194
return Expr(f.lpad(string.expr, count.expr, characters.expr))
@@ -1341,6 +1369,17 @@ def regexp_like(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr:
13411369
... )
13421370
>>> result.collect_column("m")[0].as_py()
13431371
True
1372+
1373+
Use ``flags`` for case-insensitive matching:
1374+
1375+
>>> result = df.select(
1376+
... dfn.functions.regexp_like(
1377+
... dfn.col("a"), dfn.lit("HELLO"),
1378+
... flags=dfn.lit("i"),
1379+
... ).alias("m")
1380+
... )
1381+
>>> result.collect_column("m")[0].as_py()
1382+
True
13441383
"""
13451384
if flags is not None:
13461385
flags = flags.expr
@@ -1363,6 +1402,17 @@ def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr:
13631402
... )
13641403
>>> result.collect_column("m")[0].as_py()
13651404
['42']
1405+
1406+
Use ``flags`` for case-insensitive matching:
1407+
1408+
>>> result = df.select(
1409+
... dfn.functions.regexp_match(
1410+
... dfn.col("a"), dfn.lit("(HELLO)"),
1411+
... flags=dfn.lit("i"),
1412+
... ).alias("m")
1413+
... )
1414+
>>> result.collect_column("m")[0].as_py()
1415+
['hello']
13661416
"""
13671417
if flags is not None:
13681418
flags = flags.expr
@@ -1391,6 +1441,18 @@ def regexp_replace(
13911441
... )
13921442
>>> result.collect_column("r")[0].as_py()
13931443
'hello XX'
1444+
1445+
Use the ``g`` flag to replace all occurrences:
1446+
1447+
>>> df = ctx.from_pydict({"a": ["a1 b2 c3"]})
1448+
>>> result = df.select(
1449+
... dfn.functions.regexp_replace(
1450+
... dfn.col("a"), dfn.lit("\\d+"),
1451+
... dfn.lit("X"), flags=dfn.lit("g"),
1452+
... ).alias("r")
1453+
... )
1454+
>>> result.collect_column("r")[0].as_py()
1455+
'aX bX cX'
13941456
"""
13951457
if flags is not None:
13961458
flags = flags.expr
@@ -1412,6 +1474,17 @@ def regexp_count(
14121474
... dfn.functions.regexp_count(dfn.col("a"), dfn.lit("abc")).alias("c"))
14131475
>>> result.collect_column("c")[0].as_py()
14141476
2
1477+
1478+
Use ``start`` to begin searching from a position, and
1479+
``flags`` for case-insensitive matching:
1480+
1481+
>>> result = df.select(
1482+
... dfn.functions.regexp_count(
1483+
... dfn.col("a"), dfn.lit("ABC"),
1484+
... start=dfn.lit(4), flags=dfn.lit("i"),
1485+
... ).alias("c"))
1486+
>>> result.collect_column("c")[0].as_py()
1487+
1
14151488
"""
14161489
if flags is not None:
14171490
flags = flags.expr
@@ -1447,6 +1520,31 @@ def regexp_instr(
14471520
... )
14481521
>>> result.collect_column("pos")[0].as_py()
14491522
7
1523+
1524+
Use ``start`` to search from a position, ``n`` for the
1525+
nth occurrence, and ``flags`` for case-insensitive mode:
1526+
1527+
>>> df = ctx.from_pydict({"a": ["abc ABC abc"]})
1528+
>>> result = df.select(
1529+
... dfn.functions.regexp_instr(
1530+
... dfn.col("a"), dfn.lit("abc"),
1531+
... start=dfn.lit(2), n=dfn.lit(1),
1532+
... flags=dfn.lit("i"),
1533+
... ).alias("pos")
1534+
... )
1535+
>>> result.collect_column("pos")[0].as_py()
1536+
5
1537+
1538+
Use ``sub_expr`` to get the position of a capture group:
1539+
1540+
>>> result = df.select(
1541+
... dfn.functions.regexp_instr(
1542+
... dfn.col("a"), dfn.lit("(abc)"),
1543+
... sub_expr=dfn.lit(1),
1544+
... ).alias("pos")
1545+
... )
1546+
>>> result.collect_column("pos")[0].as_py()
1547+
1
14501548
"""
14511549
start = start.expr if start is not None else None
14521550
n = n.expr if n is not None else None
@@ -2196,6 +2294,13 @@ def trunc(num: Expr, precision: Expr | None = None) -> Expr:
21962294
>>> result = df.select(dfn.functions.trunc(dfn.col("a")).alias("t"))
21972295
>>> result.collect_column("t")[0].as_py()
21982296
1.0
2297+
2298+
>>> result = df.select(
2299+
... dfn.functions.trunc(
2300+
... dfn.col("a"), precision=dfn.lit(2)
2301+
... ).alias("t"))
2302+
>>> result.collect_column("t")[0].as_py()
2303+
1.56
21992304
"""
22002305
if precision is not None:
22012306
return Expr(f.trunc(num.expr, precision.expr))

0 commit comments

Comments
 (0)