Skip to content

Commit e524121

Browse files
ntjohnson1claude
andauthored
Add docstring examples for Common utility functions (#1419)
* Add docstring examples for Common utility functions Add example usage to docstrings for Common utility functions to improve documentation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Don't add examples for aliases * Parameters back to args * Examples to google doc style --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b9a958e commit e524121

File tree

1 file changed

+139
-11
lines changed

1 file changed

+139
-11
lines changed

python/datafusion/functions.py

Lines changed: 139 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -295,45 +295,90 @@
295295

296296

297297
def isnan(expr: Expr) -> Expr:
298-
"""Returns true if a given number is +NaN or -NaN otherwise returns false."""
298+
"""Returns true if a given number is +NaN or -NaN otherwise returns false.
299+
300+
Examples:
301+
>>> ctx = dfn.SessionContext()
302+
>>> df = ctx.from_pydict({"a": [1.0, np.nan]})
303+
>>> result = df.select(dfn.functions.isnan(dfn.col("a")).alias("isnan"))
304+
>>> result.collect_column("isnan")[1].as_py()
305+
True
306+
"""
299307
return Expr(f.isnan(expr.expr))
300308

301309

302310
def nullif(expr1: Expr, expr2: Expr) -> Expr:
303311
"""Returns NULL if expr1 equals expr2; otherwise it returns expr1.
304312
305313
This can be used to perform the inverse operation of the COALESCE expression.
314+
315+
Examples:
316+
>>> ctx = dfn.SessionContext()
317+
>>> df = ctx.from_pydict({"a": [1, 2], "b": [1, 3]})
318+
>>> result = df.select(
319+
... dfn.functions.nullif(dfn.col("a"), dfn.col("b")).alias("nullif"))
320+
>>> result.collect_column("nullif").to_pylist()
321+
[None, 2]
306322
"""
307323
return Expr(f.nullif(expr1.expr, expr2.expr))
308324

309325

310326
def encode(expr: Expr, encoding: Expr) -> Expr:
311-
"""Encode the ``input``, using the ``encoding``. encoding can be base64 or hex."""
327+
"""Encode the ``input``, using the ``encoding``. encoding can be base64 or hex.
328+
329+
Examples:
330+
>>> ctx = dfn.SessionContext()
331+
>>> df = ctx.from_pydict({"a": ["hello"]})
332+
>>> result = df.select(
333+
... dfn.functions.encode(dfn.col("a"), dfn.lit("base64")).alias("enc"))
334+
>>> result.collect_column("enc")[0].as_py()
335+
'aGVsbG8'
336+
"""
312337
return Expr(f.encode(expr.expr, encoding.expr))
313338

314339

315340
def decode(expr: Expr, encoding: Expr) -> Expr:
316-
"""Decode the ``input``, using the ``encoding``. encoding can be base64 or hex."""
341+
"""Decode the ``input``, using the ``encoding``. encoding can be base64 or hex.
342+
343+
Examples:
344+
>>> ctx = dfn.SessionContext()
345+
>>> df = ctx.from_pydict({"a": ["aGVsbG8="]})
346+
>>> result = df.select(
347+
... dfn.functions.decode(dfn.col("a"), dfn.lit("base64")).alias("dec"))
348+
>>> result.collect_column("dec")[0].as_py()
349+
b'hello'
350+
"""
317351
return Expr(f.decode(expr.expr, encoding.expr))
318352

319353

320354
def array_to_string(expr: Expr, delimiter: Expr) -> Expr:
321-
"""Converts each element to its text representation."""
355+
"""Converts each element to its text representation.
356+
357+
Examples:
358+
>>> ctx = dfn.SessionContext()
359+
>>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
360+
>>> result = df.select(
361+
... dfn.functions.array_to_string(dfn.col("a"), dfn.lit(",")).alias("s"))
362+
>>> result.collect_column("s")[0].as_py()
363+
'1,2,3'
364+
"""
322365
return Expr(f.array_to_string(expr.expr, delimiter.expr.cast(pa.string())))
323366

324367

325368
def array_join(expr: Expr, delimiter: Expr) -> Expr:
326369
"""Converts each element to its text representation.
327370
328-
This is an alias for :py:func:`array_to_string`.
371+
See Also:
372+
This is an alias for :py:func:`array_to_string`.
329373
"""
330374
return array_to_string(expr, delimiter)
331375

332376

333377
def list_to_string(expr: Expr, delimiter: Expr) -> Expr:
334378
"""Converts each element to its text representation.
335379
336-
This is an alias for :py:func:`array_to_string`.
380+
See Also:
381+
This is an alias for :py:func:`array_to_string`.
337382
"""
338383
return array_to_string(expr, delimiter)
339384

@@ -342,12 +387,27 @@ def list_join(expr: Expr, delimiter: Expr) -> Expr:
342387
"""Converts each element to its text representation.
343388
344389
This is an alias for :py:func:`array_to_string`.
390+
391+
See Also:
392+
This is an alias for :py:func:`array_to_string`.
345393
"""
346394
return array_to_string(expr, delimiter)
347395

348396

349397
def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr:
350-
"""Returns whether the argument is contained within the list ``values``."""
398+
"""Returns whether the argument is contained within the list ``values``.
399+
400+
Examples:
401+
>>> ctx = dfn.SessionContext()
402+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
403+
>>> result = df.select(
404+
... dfn.functions.in_list(
405+
... dfn.col("a"), [dfn.lit(1), dfn.lit(3)]
406+
... ).alias("in")
407+
... )
408+
>>> result.collect_column("in").to_pylist()
409+
[True, False, True]
410+
"""
351411
values = [v.expr for v in values]
352412
return Expr(f.in_list(arg.expr, values, negated))
353413

@@ -357,6 +417,14 @@ def digest(value: Expr, method: Expr) -> Expr:
357417
358418
Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s,
359419
blake2b, and blake3.
420+
421+
Examples:
422+
>>> ctx = dfn.SessionContext()
423+
>>> df = ctx.from_pydict({"a": ["hello"]})
424+
>>> result = df.select(
425+
... dfn.functions.digest(dfn.col("a"), dfn.lit("md5")).alias("d"))
426+
>>> len(result.collect_column("d")[0].as_py()) > 0
427+
True
360428
"""
361429
return Expr(f.digest(value.expr, method.expr))
362430

@@ -365,6 +433,15 @@ def concat(*args: Expr) -> Expr:
365433
"""Concatenates the text representations of all the arguments.
366434
367435
NULL arguments are ignored.
436+
437+
Examples:
438+
>>> ctx = dfn.SessionContext()
439+
>>> df = ctx.from_pydict({"a": ["hello"], "b": [" world"]})
440+
>>> result = df.select(
441+
... dfn.functions.concat(dfn.col("a"), dfn.col("b")).alias("c")
442+
... )
443+
>>> result.collect_column("c")[0].as_py()
444+
'hello world'
368445
"""
369446
args = [arg.expr for arg in args]
370447
return Expr(f.concat(args))
@@ -374,13 +451,27 @@ def concat_ws(separator: str, *args: Expr) -> Expr:
374451
"""Concatenates the list ``args`` with the separator.
375452
376453
``NULL`` arguments are ignored. ``separator`` should not be ``NULL``.
454+
455+
Examples:
456+
>>> ctx = dfn.SessionContext()
457+
>>> df = ctx.from_pydict({"a": ["hello"], "b": ["world"]})
458+
>>> result = df.select(
459+
... dfn.functions.concat_ws("-", dfn.col("a"), dfn.col("b")).alias("c"))
460+
>>> result.collect_column("c")[0].as_py()
461+
'hello-world'
377462
"""
378463
args = [arg.expr for arg in args]
379464
return Expr(f.concat_ws(separator, args))
380465

381466

382467
def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> SortExpr:
383-
"""Creates a new sort expression."""
468+
"""Creates a new sort expression.
469+
470+
Examples:
471+
>>> sort_expr = dfn.functions.order_by(dfn.col("a"), ascending=False)
472+
>>> sort_expr.ascending()
473+
False
474+
"""
384475
return SortExpr(expr, ascending=ascending, nulls_first=nulls_first)
385476

386477

@@ -392,14 +483,26 @@ def alias(expr: Expr, name: str, metadata: dict[str, str] | None = None) -> Expr
392483
name: The alias name
393484
metadata: Optional metadata to attach to the column
394485
395-
Returns:
396-
An expression with the given alias
486+
Examples:
487+
>>> ctx = dfn.SessionContext()
488+
>>> df = ctx.from_pydict({"a": [1, 2]})
489+
>>> df.select(
490+
... dfn.functions.alias(dfn.col("a"), "b")
491+
... ).collect_column("b")[0].as_py()
492+
1
397493
"""
398494
return Expr(f.alias(expr.expr, name, metadata))
399495

400496

401497
def col(name: str) -> Expr:
402-
"""Creates a column reference expression."""
498+
"""Creates a column reference expression.
499+
500+
Examples:
501+
>>> ctx = dfn.SessionContext()
502+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
503+
>>> df.select(dfn.functions.col("a")).collect_column("a")[0].as_py()
504+
1
505+
"""
403506
return Expr(f.col(name))
404507

405508

@@ -413,6 +516,13 @@ def count_star(filter: Expr | None = None) -> Expr:
413516
414517
Args:
415518
filter: If provided, only count rows for which the filter is True
519+
520+
Examples:
521+
>>> ctx = dfn.SessionContext()
522+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
523+
>>> result = df.aggregate([], [dfn.functions.count_star().alias("cnt")])
524+
>>> result.collect_column("cnt")[0].as_py()
525+
3
416526
"""
417527
return count(Expr.literal(1), filter=filter)
418528

@@ -423,6 +533,15 @@ def case(expr: Expr) -> CaseBuilder:
423533
Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the
424534
expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for
425535
detailed usage.
536+
537+
Examples:
538+
>>> ctx = dfn.SessionContext()
539+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
540+
>>> result = df.select(
541+
... dfn.functions.case(dfn.col("a")).when(dfn.lit(1),
542+
... dfn.lit("one")).otherwise(dfn.lit("other")).alias("c"))
543+
>>> result.collect_column("c")[0].as_py()
544+
'one'
426545
"""
427546
return CaseBuilder(f.case(expr.expr))
428547

@@ -433,6 +552,15 @@ def when(when: Expr, then: Expr) -> CaseBuilder:
433552
Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the
434553
expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for
435554
detailed usage.
555+
556+
Examples:
557+
>>> ctx = dfn.SessionContext()
558+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
559+
>>> result = df.select(
560+
... dfn.functions.when(dfn.col("a") > dfn.lit(2),
561+
... dfn.lit("big")).otherwise(dfn.lit("small")).alias("c"))
562+
>>> result.collect_column("c")[2].as_py()
563+
'big'
436564
"""
437565
return CaseBuilder(f.when(when.expr, then.expr))
438566

0 commit comments

Comments
 (0)