Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
/docs
inst/doc
README_cache/
*.tar.gz
*.Rcheck/
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ S3method(auto_copy,duckplyr_df)
S3method(collect,duckplyr_df)
S3method(collect,prudent_duckplyr_df)
S3method(compute,duckplyr_df)
S3method(compute_csv,data.frame)
S3method(compute_csv,duckplyr_df)
S3method(compute_parquet,data.frame)
S3method(compute_parquet,duckplyr_df)
S3method(count,duckplyr_df)
S3method(cross_join,duckplyr_df)
S3method(distinct,duckplyr_df)
Expand Down
37 changes: 26 additions & 11 deletions R/compute_csv.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
#' Compute results to a CSV file
#'
#' For a duckplyr frame, this function executes the query
#' and stores the results in a CSV file,
#' without converting it to an R data frame.
#' This is a generic function that executes a query
#' and stores the results in a CSV file.
#' For a duckplyr frame, the materialization occurs outside of R.
#' The result is a duckplyr frame that can be used with subsequent dplyr verbs.
#' This function can also be used as a CSV writer for regular data frames.
#'
#' @inheritParams rlang::args_dots_empty
#' @inheritParams compute.duckplyr_df
#' @inheritParams compute_parquet
#' @param options A list of additional options to pass to create the storage format,
#' see <https://duckdb.org/docs/sql/statements/copy.html#csv-options>
#' for details.
#' @param x A data frame or lazy data frame.
#' @param path The path of the CSV file to create.
#' @param ... Additional arguments passed to methods.
#'
#' @return A duckplyr frame.
#' @return A data frame (the class may vary based on the input).
#'
#' @export
#' @examples
Expand All @@ -24,7 +21,18 @@
#' df <- compute_csv(df, path)
#' readLines(path)
#' @seealso [compute_parquet()], [compute.duckplyr_df()], [dplyr::collect()]
compute_csv <- function(x, path, ..., prudence = NULL, options = NULL) {
compute_csv <- function(x, path, ...) {
UseMethod("compute_csv")
}

#' @inheritParams compute.duckplyr_df
#' @param options A list of additional options to pass to create the storage format,
#' see <https://duckdb.org/docs/sql/statements/copy.html#csv-options>
#' for details.
#'
#' @rdname compute_csv
#' @export
compute_csv.duckplyr_df <- function(x, path, ..., prudence = NULL, options = NULL) {
check_dots_empty()

if (is.null(options)) {
Expand All @@ -46,3 +54,10 @@ compute_csv <- function(x, path, ..., prudence = NULL, options = NULL) {

read_csv_duckdb(path, prudence = prudence)
}

#' @rdname compute_csv
#' @export
compute_csv.data.frame <- function(x, path, ..., prudence = NULL, options = NULL) {
x <- as_duckdb_tibble(x)
compute_csv.duckplyr_df(x, path, ..., prudence = prudence, options = options)
}
36 changes: 25 additions & 11 deletions R/compute_parquet.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
#' Compute results to a Parquet file
#'
#' For a duckplyr frame, this function executes the query
#' and stores the results in a Parquet file,
#' without converting it to an R data frame.
#' This is a generic function that executes a query
#' and stores the results in a Parquet file.
#' For a duckplyr frame, the materialization occurs outside of R.
#' The result is a duckplyr frame that can be used with subsequent dplyr verbs.
#' This function can also be used as a Parquet writer for regular data frames.
#'
#' @inheritParams rlang::args_dots_empty
#' @inheritParams compute.duckplyr_df
#' @param x A duckplyr frame.
#' @param x A data frame or lazy data frame.
#' @param path The path of the Parquet file to create.
#' @param options A list of additional options to pass to create the Parquet file,
#' see <https://duckdb.org/docs/sql/statements/copy.html#parquet-options>
#' for details.
#' @param ... Additional arguments passed to methods.
#'
#' @return A duckplyr frame.
#' @return A data frame (the class may vary based on the input).
#'
#' @export
#' @examples
Expand All @@ -25,7 +21,18 @@
#' df <- compute_parquet(df, path)
#' explain(df)
#' @seealso [compute_csv()], [compute.duckplyr_df()], [dplyr::collect()]
compute_parquet <- function(x, path, ..., prudence = NULL, options = NULL) {
compute_parquet <- function(x, path, ...) {
UseMethod("compute_parquet")
}

#' @inheritParams compute.duckplyr_df
#' @param options A list of additional options to pass to create the Parquet file,
#' see <https://duckdb.org/docs/sql/statements/copy.html#parquet-options>
#' for details.
#'
#' @rdname compute_parquet
#' @export
compute_parquet.duckplyr_df <- function(x, path, ..., prudence = NULL, options = NULL) {
check_dots_empty()

if (is.null(options)) {
Expand All @@ -47,3 +54,10 @@ compute_parquet <- function(x, path, ..., prudence = NULL, options = NULL) {

read_parquet_duckdb(path, prudence = prudence)
}

#' @rdname compute_parquet
#' @export
compute_parquet.data.frame <- function(x, path, ..., prudence = NULL, options = NULL) {
x <- as_duckdb_tibble(x)
compute_parquet.duckplyr_df(x, path, ..., prudence = prudence, options = options)
}
23 changes: 14 additions & 9 deletions man/compute_csv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 13 additions & 8 deletions man/compute_parquet.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test-compute_csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,19 @@ test_that("compute_csv() prudence", {
expect_true(is_prudent_duckplyr_df(out))
expect_identical(collect(out), as_tibble(df))
})

test_that("compute_csv() is a generic function", {
expect_true(is.function(compute_csv))
m <- methods("compute_csv")
expect_true(any(grepl("compute_csv.duckplyr_df", m)))
expect_true(any(grepl("compute_csv.data.frame", m)))
})

test_that("compute_csv() with duckplyr_df", {
df <- duckdb_tibble(x = c(1, 2))
withr::defer(unlink("test_duck.csv"))
out <- compute_csv(df, path = "test_duck.csv")

expect_identical(collect(out), collect(df))
expect_true(inherits(out, "duckplyr_df"))
})
16 changes: 16 additions & 0 deletions tests/testthat/test-compute_parquet.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,19 @@ test_that("compute_parquet() with options", {
expect_identical(out, as_duckdb_tibble(df))
expect_false(is_prudent_duckplyr_df(out))
})

test_that("compute_parquet() is a generic function", {
expect_true(is.function(compute_parquet))
m <- methods("compute_parquet")
expect_true(any(grepl("compute_parquet.duckplyr_df", m)))
expect_true(any(grepl("compute_parquet.data.frame", m)))
})

test_that("compute_parquet() with duckplyr_df", {
df <- duckdb_tibble(x = c(1, 2))
withr::defer(unlink("test_duck.parquet"))
out <- compute_parquet(df, path = "test_duck.parquet")

expect_identical(collect(out), collect(df))
expect_true(inherits(out, "duckplyr_df"))
})