Skip to content

Commit 4bebc2c

Browse files
ntjohnson1claude
andcommitted
Add docstring examples for Common utility functions
Add example usage to docstrings for Common utility functions to improve documentation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1160d5a commit 4bebc2c

File tree

1 file changed

+169
-10
lines changed

1 file changed

+169
-10
lines changed

python/datafusion/functions.py

Lines changed: 169 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -295,37 +295,94 @@
295295

296296

297297
def isnan(expr: Expr) -> Expr:
298-
"""Returns true if a given number is +NaN or -NaN otherwise returns false."""
298+
"""Returns true if a given number is +NaN or -NaN otherwise returns false.
299+
300+
Examples:
301+
---------
302+
>>> ctx = dfn.SessionContext()
303+
>>> df = ctx.from_pydict({"a": [1.0, np.nan]})
304+
>>> result = df.select(dfn.functions.isnan(dfn.col("a")).alias("isnan"))
305+
>>> result.collect_column("isnan")[1].as_py()
306+
True
307+
"""
299308
return Expr(f.isnan(expr.expr))
300309

301310

302311
def nullif(expr1: Expr, expr2: Expr) -> Expr:
303312
"""Returns NULL if expr1 equals expr2; otherwise it returns expr1.
304313
305314
This can be used to perform the inverse operation of the COALESCE expression.
315+
316+
Examples:
317+
---------
318+
>>> ctx = dfn.SessionContext()
319+
>>> df = ctx.from_pydict({"a": [1, 2], "b": [1, 3]})
320+
>>> result = df.select(
321+
... dfn.functions.nullif(dfn.col("a"), dfn.col("b")).alias("nullif"))
322+
>>> result.collect_column("nullif").to_pylist()
323+
[None, 2]
306324
"""
307325
return Expr(f.nullif(expr1.expr, expr2.expr))
308326

309327

310328
def encode(expr: Expr, encoding: Expr) -> Expr:
311-
"""Encode the ``input``, using the ``encoding``. encoding can be base64 or hex."""
329+
"""Encode the ``input``, using the ``encoding``. encoding can be base64 or hex.
330+
331+
Examples:
332+
---------
333+
>>> ctx = dfn.SessionContext()
334+
>>> df = ctx.from_pydict({"a": ["hello"]})
335+
>>> result = df.select(
336+
... dfn.functions.encode(dfn.col("a"), dfn.lit("base64")).alias("enc"))
337+
>>> result.collect_column("enc")[0].as_py()
338+
'aGVsbG8'
339+
"""
312340
return Expr(f.encode(expr.expr, encoding.expr))
313341

314342

315343
def decode(expr: Expr, encoding: Expr) -> Expr:
316-
"""Decode the ``input``, using the ``encoding``. encoding can be base64 or hex."""
344+
"""Decode the ``input``, using the ``encoding``. encoding can be base64 or hex.
345+
346+
Examples:
347+
---------
348+
>>> ctx = dfn.SessionContext()
349+
>>> df = ctx.from_pydict({"a": ["aGVsbG8="]})
350+
>>> result = df.select(
351+
... dfn.functions.decode(dfn.col("a"), dfn.lit("base64")).alias("dec"))
352+
>>> result.collect_column("dec")[0].as_py()
353+
b'hello'
354+
"""
317355
return Expr(f.decode(expr.expr, encoding.expr))
318356

319357

320358
def array_to_string(expr: Expr, delimiter: Expr) -> Expr:
321-
"""Converts each element to its text representation."""
359+
"""Converts each element to its text representation.
360+
361+
Examples:
362+
---------
363+
>>> ctx = dfn.SessionContext()
364+
>>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
365+
>>> result = df.select(
366+
... dfn.functions.array_to_string(dfn.col("a"), dfn.lit(",")).alias("s"))
367+
>>> result.collect_column("s")[0].as_py()
368+
'1,2,3'
369+
"""
322370
return Expr(f.array_to_string(expr.expr, delimiter.expr.cast(pa.string())))
323371

324372

325373
def array_join(expr: Expr, delimiter: Expr) -> Expr:
326374
"""Converts each element to its text representation.
327375
328376
This is an alias for :py:func:`array_to_string`.
377+
378+
Examples:
379+
---------
380+
>>> ctx = dfn.SessionContext()
381+
>>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
382+
>>> result = df.select(
383+
... dfn.functions.array_join(dfn.col("a"), dfn.lit("-")).alias("s"))
384+
>>> result.collect_column("s")[0].as_py()
385+
'1-2-3'
329386
"""
330387
return array_to_string(expr, delimiter)
331388

@@ -334,6 +391,15 @@ def list_to_string(expr: Expr, delimiter: Expr) -> Expr:
334391
"""Converts each element to its text representation.
335392
336393
This is an alias for :py:func:`array_to_string`.
394+
395+
Examples:
396+
---------
397+
>>> ctx = dfn.SessionContext()
398+
>>> df = ctx.from_pydict({"a": [[4, 5, 6]]})
399+
>>> result = df.select(
400+
... dfn.functions.list_to_string(dfn.col("a"), dfn.lit(",")).alias("s"))
401+
>>> result.collect_column("s")[0].as_py()
402+
'4,5,6'
337403
"""
338404
return array_to_string(expr, delimiter)
339405

@@ -342,12 +408,31 @@ def list_join(expr: Expr, delimiter: Expr) -> Expr:
342408
"""Converts each element to its text representation.
343409
344410
This is an alias for :py:func:`array_to_string`.
411+
412+
Examples:
413+
---------
414+
>>> ctx = dfn.SessionContext()
415+
>>> df = ctx.from_pydict({"a": [[7, 8, 9]]})
416+
>>> result = df.select(
417+
... dfn.functions.list_join(dfn.col("a"), dfn.lit("|")).alias("s"))
418+
>>> result.collect_column("s")[0].as_py()
419+
'7|8|9'
345420
"""
346421
return array_to_string(expr, delimiter)
347422

348423

349424
def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr:
350-
"""Returns whether the argument is contained within the list ``values``."""
425+
"""Returns whether the argument is contained within the list ``values``.
426+
427+
Examples:
428+
---------
429+
>>> ctx = dfn.SessionContext()
430+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
431+
>>> result = df.select(
432+
... dfn.functions.in_list(dfn.col("a"), [dfn.lit(1), dfn.lit(3)]).alias("in"))
433+
>>> result.collect_column("in").to_pylist()
434+
[True, False, True]
435+
"""
351436
values = [v.expr for v in values]
352437
return Expr(f.in_list(arg.expr, values, negated))
353438

@@ -357,6 +442,15 @@ def digest(value: Expr, method: Expr) -> Expr:
357442
358443
Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s,
359444
blake2b, and blake3.
445+
446+
Examples:
447+
---------
448+
>>> ctx = dfn.SessionContext()
449+
>>> df = ctx.from_pydict({"a": ["hello"]})
450+
>>> result = df.select(
451+
... dfn.functions.digest(dfn.col("a"), dfn.lit("md5")).alias("d"))
452+
>>> len(result.collect_column("d")[0].as_py()) > 0
453+
True
360454
"""
361455
return Expr(f.digest(value.expr, method.expr))
362456

@@ -365,6 +459,14 @@ def concat(*args: Expr) -> Expr:
365459
"""Concatenates the text representations of all the arguments.
366460
367461
NULL arguments are ignored.
462+
463+
Examples:
464+
---------
465+
>>> ctx = dfn.SessionContext()
466+
>>> df = ctx.from_pydict({"a": ["hello"], "b": [" world"]})
467+
>>> result = df.select(dfn.functions.concat(dfn.col("a"), dfn.col("b")).alias("c"))
468+
>>> result.collect_column("c")[0].as_py()
469+
'hello world'
368470
"""
369471
args = [arg.expr for arg in args]
370472
return Expr(f.concat(args))
@@ -374,32 +476,61 @@ def concat_ws(separator: str, *args: Expr) -> Expr:
374476
"""Concatenates the list ``args`` with the separator.
375477
376478
``NULL`` arguments are ignored. ``separator`` should not be ``NULL``.
479+
480+
Examples:
481+
---------
482+
>>> ctx = dfn.SessionContext()
483+
>>> df = ctx.from_pydict({"a": ["hello"], "b": ["world"]})
484+
>>> result = df.select(
485+
... dfn.functions.concat_ws("-", dfn.col("a"), dfn.col("b")).alias("c"))
486+
>>> result.collect_column("c")[0].as_py()
487+
'hello-world'
377488
"""
378489
args = [arg.expr for arg in args]
379490
return Expr(f.concat_ws(separator, args))
380491

381492

382493
def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> SortExpr:
383-
"""Creates a new sort expression."""
494+
"""Creates a new sort expression.
495+
496+
Examples:
497+
---------
498+
>>> sort_expr = dfn.functions.order_by(dfn.col("a"), ascending=False)
499+
>>> sort_expr.ascending()
500+
False
501+
"""
384502
return SortExpr(expr, ascending=ascending, nulls_first=nulls_first)
385503

386504

387505
def alias(expr: Expr, name: str, metadata: dict[str, str] | None = None) -> Expr:
388506
"""Creates an alias expression with an optional metadata dictionary.
389507
390-
Args:
508+
Parameters:
509+
-----------
391510
expr: The expression to alias
392511
name: The alias name
393512
metadata: Optional metadata to attach to the column
394513
395-
Returns:
396-
An expression with the given alias
514+
Examples:
515+
---------
516+
>>> ctx = dfn.SessionContext()
517+
>>> df = ctx.from_pydict({"a": [1, 2]})
518+
>>> df.select(dfn.functions.alias(dfn.col("a"), "b")).collect_column("b")[0].as_py()
519+
1
397520
"""
398521
return Expr(f.alias(expr.expr, name, metadata))
399522

400523

401524
def col(name: str) -> Expr:
402-
"""Creates a column reference expression."""
525+
"""Creates a column reference expression.
526+
527+
Examples:
528+
---------
529+
>>> ctx = dfn.SessionContext()
530+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
531+
>>> df.select(dfn.functions.col("a")).collect_column("a")[0].as_py()
532+
1
533+
"""
403534
return Expr(f.col(name))
404535

405536

@@ -413,6 +544,14 @@ def count_star(filter: Expr | None = None) -> Expr:
413544
414545
Args:
415546
filter: If provided, only count rows for which the filter is True
547+
548+
Examples:
549+
---------
550+
>>> ctx = dfn.SessionContext()
551+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
552+
>>> result = df.aggregate([], [dfn.functions.count_star().alias("cnt")])
553+
>>> result.collect_column("cnt")[0].as_py()
554+
3
416555
"""
417556
return count(Expr.literal(1), filter=filter)
418557

@@ -423,6 +562,16 @@ def case(expr: Expr) -> CaseBuilder:
423562
Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the
424563
expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for
425564
detailed usage.
565+
566+
Examples:
567+
---------
568+
>>> ctx = dfn.SessionContext()
569+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
570+
>>> result = df.select(
571+
... dfn.functions.case(dfn.col("a")).when(dfn.lit(1),
572+
... dfn.lit("one")).otherwise(dfn.lit("other")).alias("c"))
573+
>>> result.collect_column("c")[0].as_py()
574+
'one'
426575
"""
427576
return CaseBuilder(f.case(expr.expr))
428577

@@ -433,6 +582,16 @@ def when(when: Expr, then: Expr) -> CaseBuilder:
433582
Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the
434583
expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for
435584
detailed usage.
585+
586+
Examples:
587+
---------
588+
>>> ctx = dfn.SessionContext()
589+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
590+
>>> result = df.select(
591+
... dfn.functions.when(dfn.col("a") > dfn.lit(2),
592+
... dfn.lit("big")).otherwise(dfn.lit("small")).alias("c"))
593+
>>> result.collect_column("c")[2].as_py()
594+
'big'
436595
"""
437596
return CaseBuilder(f.when(when.expr, then.expr))
438597

0 commit comments

Comments
 (0)