diff --git a/.gitignore b/.gitignore index 55ec1f58c..5fe33f9d0 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ /docs inst/doc README_cache/ +*.tar.gz +*.Rcheck/ diff --git a/NAMESPACE b/NAMESPACE index 455b13a32..836a4a39a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,10 @@ S3method(auto_copy,duckplyr_df) S3method(collect,duckplyr_df) S3method(collect,prudent_duckplyr_df) S3method(compute,duckplyr_df) +S3method(compute_csv,data.frame) +S3method(compute_csv,duckplyr_df) +S3method(compute_parquet,data.frame) +S3method(compute_parquet,duckplyr_df) S3method(count,duckplyr_df) S3method(cross_join,duckplyr_df) S3method(distinct,duckplyr_df) diff --git a/R/compute_csv.R b/R/compute_csv.R index f777dcabc..5775faf06 100644 --- a/R/compute_csv.R +++ b/R/compute_csv.R @@ -1,19 +1,16 @@ #' Compute results to a CSV file #' -#' For a duckplyr frame, this function executes the query -#' and stores the results in a CSV file, -#' without converting it to an R data frame. +#' This is a generic function that executes a query +#' and stores the results in a CSV file. +#' For a duckplyr frame, the materialization occurs outside of R. #' The result is a duckplyr frame that can be used with subsequent dplyr verbs. -#' This function can also be used as a CSV writer for regular data frames. #' #' @inheritParams rlang::args_dots_empty -#' @inheritParams compute.duckplyr_df -#' @inheritParams compute_parquet -#' @param options A list of additional options to pass to create the storage format, -#' see -#' for details. +#' @param x A data frame or lazy data frame. +#' @param path The path of the CSV file to create. +#' @param ... Additional arguments passed to methods. #' -#' @return A duckplyr frame. +#' @return A data frame (the class may vary based on the input). #' #' @export #' @examples @@ -24,7 +21,18 @@ #' df <- compute_csv(df, path) #' readLines(path) #' @seealso [compute_parquet()], [compute.duckplyr_df()], [dplyr::collect()] -compute_csv <- function(x, path, ..., prudence = NULL, options = NULL) { +compute_csv <- function(x, path, ...) { + UseMethod("compute_csv") +} + +#' @inheritParams compute.duckplyr_df +#' @param options A list of additional options to pass to create the storage format, +#' see +#' for details. +#' +#' @rdname compute_csv +#' @export +compute_csv.duckplyr_df <- function(x, path, ..., prudence = NULL, options = NULL) { check_dots_empty() if (is.null(options)) { @@ -46,3 +54,10 @@ compute_csv <- function(x, path, ..., prudence = NULL, options = NULL) { read_csv_duckdb(path, prudence = prudence) } + +#' @rdname compute_csv +#' @export +compute_csv.data.frame <- function(x, path, ..., prudence = NULL, options = NULL) { + x <- as_duckdb_tibble(x) + compute_csv.duckplyr_df(x, path, ..., prudence = prudence, options = options) +} diff --git a/R/compute_parquet.R b/R/compute_parquet.R index 228f556a1..94844e52a 100644 --- a/R/compute_parquet.R +++ b/R/compute_parquet.R @@ -1,20 +1,16 @@ #' Compute results to a Parquet file #' -#' For a duckplyr frame, this function executes the query -#' and stores the results in a Parquet file, -#' without converting it to an R data frame. +#' This is a generic function that executes a query +#' and stores the results in a Parquet file. +#' For a duckplyr frame, the materialization occurs outside of R. #' The result is a duckplyr frame that can be used with subsequent dplyr verbs. -#' This function can also be used as a Parquet writer for regular data frames. #' #' @inheritParams rlang::args_dots_empty -#' @inheritParams compute.duckplyr_df -#' @param x A duckplyr frame. +#' @param x A data frame or lazy data frame. #' @param path The path of the Parquet file to create. -#' @param options A list of additional options to pass to create the Parquet file, -#' see -#' for details. +#' @param ... Additional arguments passed to methods. #' -#' @return A duckplyr frame. +#' @return A data frame (the class may vary based on the input). #' #' @export #' @examples @@ -25,7 +21,18 @@ #' df <- compute_parquet(df, path) #' explain(df) #' @seealso [compute_csv()], [compute.duckplyr_df()], [dplyr::collect()] -compute_parquet <- function(x, path, ..., prudence = NULL, options = NULL) { +compute_parquet <- function(x, path, ...) { + UseMethod("compute_parquet") +} + +#' @inheritParams compute.duckplyr_df +#' @param options A list of additional options to pass to create the Parquet file, +#' see +#' for details. +#' +#' @rdname compute_parquet +#' @export +compute_parquet.duckplyr_df <- function(x, path, ..., prudence = NULL, options = NULL) { check_dots_empty() if (is.null(options)) { @@ -47,3 +54,10 @@ compute_parquet <- function(x, path, ..., prudence = NULL, options = NULL) { read_parquet_duckdb(path, prudence = prudence) } + +#' @rdname compute_parquet +#' @export +compute_parquet.data.frame <- function(x, path, ..., prudence = NULL, options = NULL) { + x <- as_duckdb_tibble(x) + compute_parquet.duckplyr_df(x, path, ..., prudence = prudence, options = options) +} diff --git a/man/compute_csv.Rd b/man/compute_csv.Rd index 9e7f0cc38..7a97526f4 100644 --- a/man/compute_csv.Rd +++ b/man/compute_csv.Rd @@ -2,16 +2,22 @@ % Please edit documentation in R/compute_csv.R \name{compute_csv} \alias{compute_csv} +\alias{compute_csv.duckplyr_df} +\alias{compute_csv.data.frame} \title{Compute results to a CSV file} \usage{ -compute_csv(x, path, ..., prudence = NULL, options = NULL) +compute_csv(x, path, ...) + +\method{compute_csv}{duckplyr_df}(x, path, ..., prudence = NULL, options = NULL) + +\method{compute_csv}{data.frame}(x, path, ..., prudence = NULL, options = NULL) } \arguments{ -\item{x}{A duckplyr frame.} +\item{x}{A data frame or lazy data frame.} -\item{path}{The path of the Parquet file to create.} +\item{path}{The path of the CSV file to create.} -\item{...}{These dots are for future extensions and must be empty.} +\item{...}{Additional arguments passed to methods.} \item{prudence}{Memory protection, controls if DuckDB may convert intermediate results in DuckDB-managed memory to data frames in R memory. @@ -32,14 +38,13 @@ see \url{https://duckdb.org/docs/sql/statements/copy.html#csv-options} for details.} } \value{ -A duckplyr frame. +A data frame (the class may vary based on the input). } \description{ -For a duckplyr frame, this function executes the query -and stores the results in a CSV file, -without converting it to an R data frame. +This is a generic function that executes a query +and stores the results in a CSV file. +For a duckplyr frame, the materialization occurs outside of R. The result is a duckplyr frame that can be used with subsequent dplyr verbs. -This function can also be used as a CSV writer for regular data frames. } \examples{ library(duckplyr) diff --git a/man/compute_parquet.Rd b/man/compute_parquet.Rd index c6430d159..8da08e976 100644 --- a/man/compute_parquet.Rd +++ b/man/compute_parquet.Rd @@ -2,16 +2,22 @@ % Please edit documentation in R/compute_parquet.R \name{compute_parquet} \alias{compute_parquet} +\alias{compute_parquet.duckplyr_df} +\alias{compute_parquet.data.frame} \title{Compute results to a Parquet file} \usage{ -compute_parquet(x, path, ..., prudence = NULL, options = NULL) +compute_parquet(x, path, ...) + +\method{compute_parquet}{duckplyr_df}(x, path, ..., prudence = NULL, options = NULL) + +\method{compute_parquet}{data.frame}(x, path, ..., prudence = NULL, options = NULL) } \arguments{ -\item{x}{A duckplyr frame.} +\item{x}{A data frame or lazy data frame.} \item{path}{The path of the Parquet file to create.} -\item{...}{These dots are for future extensions and must be empty.} +\item{...}{Additional arguments passed to methods.} \item{prudence}{Memory protection, controls if DuckDB may convert intermediate results in DuckDB-managed memory to data frames in R memory. @@ -32,14 +38,13 @@ see \url{https://duckdb.org/docs/sql/statements/copy.html#parquet-options} for details.} } \value{ -A duckplyr frame. +A data frame (the class may vary based on the input). } \description{ -For a duckplyr frame, this function executes the query -and stores the results in a Parquet file, -without converting it to an R data frame. +This is a generic function that executes a query +and stores the results in a Parquet file. +For a duckplyr frame, the materialization occurs outside of R. The result is a duckplyr frame that can be used with subsequent dplyr verbs. -This function can also be used as a Parquet writer for regular data frames. } \examples{ library(duckplyr) diff --git a/tests/testthat/test-compute_csv.R b/tests/testthat/test-compute_csv.R index 06d05d0fc..a7f1620bf 100644 --- a/tests/testthat/test-compute_csv.R +++ b/tests/testthat/test-compute_csv.R @@ -15,3 +15,19 @@ test_that("compute_csv() prudence", { expect_true(is_prudent_duckplyr_df(out)) expect_identical(collect(out), as_tibble(df)) }) + +test_that("compute_csv() is a generic function", { + expect_true(is.function(compute_csv)) + m <- methods("compute_csv") + expect_true(any(grepl("compute_csv.duckplyr_df", m))) + expect_true(any(grepl("compute_csv.data.frame", m))) +}) + +test_that("compute_csv() with duckplyr_df", { + df <- duckdb_tibble(x = c(1, 2)) + withr::defer(unlink("test_duck.csv")) + out <- compute_csv(df, path = "test_duck.csv") + + expect_identical(collect(out), collect(df)) + expect_true(inherits(out, "duckplyr_df")) +}) diff --git a/tests/testthat/test-compute_parquet.R b/tests/testthat/test-compute_parquet.R index 9eef99a76..8d64c3009 100644 --- a/tests/testthat/test-compute_parquet.R +++ b/tests/testthat/test-compute_parquet.R @@ -16,3 +16,19 @@ test_that("compute_parquet() with options", { expect_identical(out, as_duckdb_tibble(df)) expect_false(is_prudent_duckplyr_df(out)) }) + +test_that("compute_parquet() is a generic function", { + expect_true(is.function(compute_parquet)) + m <- methods("compute_parquet") + expect_true(any(grepl("compute_parquet.duckplyr_df", m))) + expect_true(any(grepl("compute_parquet.data.frame", m))) +}) + +test_that("compute_parquet() with duckplyr_df", { + df <- duckdb_tibble(x = c(1, 2)) + withr::defer(unlink("test_duck.parquet")) + out <- compute_parquet(df, path = "test_duck.parquet") + + expect_identical(collect(out), collect(df)) + expect_true(inherits(out, "duckplyr_df")) +})