-
Notifications
You must be signed in to change notification settings - Fork 23
Open
Description
Continue from #866, when storing date/datetime data with nullable = TRUE and then reading back as Arrow object returns nothing (null). See below.
Reprex
Create schema
library(tiledb)
uri <- tempfile()
domain <- tiledb_domain(tiledb_dim("row", c(0L, 100L), 100L, "INT32"))
attrib <- c(tiledb_attr("date", type = "DATETIME_DAY", nullable = TRUE),
tiledb_attr("datetime", type = "DATETIME_MS", nullable = TRUE),
tiledb_attr("nanosecs", type = "DATETIME_NS", nullable = TRUE),
tiledb_attr("float64", type = "FLOAT64", nullable = TRUE))
schema <- tiledb_array_schema(domain, attrib, sparse=TRUE)
res <- tiledb_array_create(uri, schema)Store data and read back as data.frame
df <- data.frame(row = 1:2,
date = c(as.Date("1990-01-01"), as.Date(NA)),
datetime = c(as.POSIXct("1990-01-01"), as.POSIXct(NA)),
nanosecs = nanotime::as.nanotime(c(100, NA)),
float64 = c(1, NA))
# Save data and read back as data.table
arr <- tiledb_array(uri, return_as="data.table")
arr[] <- df
arr[]
#> row date datetime nanosecs float64
#> <int> <Date> <POSc> <nanotime> <num>
#> 1: 1 1990-01-01 1990-01-01 00:00:00 1970-01-01T00:00:00.000000100+00:00 1
#> 2: 2 1970-01-01 -292275055-05-16 18:21:56 <NA> NARead back as arrow
# Now read back as arrow table
arr <- tiledb_array(uri, return_as = "arrow")
# print
arr[]
#> Table
#> 2 rows x 5 columns
#> $row <int32 not null>
#> $date <date32[day]>
#> $datetime <timestamp[ms]>
#> $nanosecs <timestamp[ns]>
#> $float64 <double>
# Convert to data.frame
data.table::as.data.table(arr[])
#> row date datetime nanosecs float64
#> <int> <Date> <POSc> <POSc> <num>
#> 1: 1 <NA> <NA> <NA> 1
#> 2: 2 <NA> <NA> <NA> NA
# or equivalent conversion
arr[]$to_data_frame()
#> row date datetime nanosecs float64
#> 1 1 <NA> <NA> <NA> 1
#> 2 2 <NA> <NA> <NA> NA
# hmm...
arr[][["date"]]
#> ChunkedArray
#> <date32[day]>
#> [
#> [
#> null,
#> null
#> ]
#> ]With nullable = FALSE works as expected
library(tiledb)
uri <- tempfile()
domain <- tiledb_domain(tiledb_dim("row", c(0L, 100L), 100L, "INT32"))
attrib <- c(tiledb_attr("date", type = "DATETIME_DAY", nullable = FALSE),
tiledb_attr("datetime", type = "DATETIME_MS", nullable = FALSE),
tiledb_attr("nanosecs", type = "DATETIME_NS", nullable = FALSE),
tiledb_attr("float64", type = "FLOAT64", nullable = FALSE))
schema <- tiledb_array_schema(domain, attrib, sparse=TRUE)
res <- tiledb_array_create(uri, schema)
df <- data.frame(row = 1:2,
date = c(as.Date("1990-01-01"), as.Date(NA)),
datetime = c(as.POSIXct("1990-01-01"), as.POSIXct(NA)),
nanosecs = nanotime::as.nanotime(c(100, NA)),
float64 = c(1, NA))
# Save data and read back as data.table
arr <- tiledb_array(uri, return_as="data.table")
arr[] <- df
arr[]
#> row date datetime
#> <int> <Date> <POSc>
#> 1: 1 1990-01-01 1990-01-01 00:00:00
#> 2: 2 1970-01-01 -292275055-05-16 18:21:56
#> nanosecs float64
#> <nanotime> <num>
#> 1: 1970-01-01T00:00:00.000000100+00:00 1
#> 2: <NA> NA
# Now read back as arrow table
arr <- tiledb_array(uri, return_as = "arrow")
# print
arr[]
#> Table
#> 2 rows x 5 columns
#> $row <int32 not null>
#> $date <date32[day] not null>
#> $datetime <timestamp[ms] not null>
#> $nanosecs <timestamp[ns] not null>
#> $float64 <double not null>
# Convert to data.frame
data.table::as.data.table(arr[])
#> row date datetime nanosecs float64
#> <int> <Date> <POSc> <POSc> <num>
#> 1: 1 1990-01-01 1990-01-01 00:00:00 1970-01-01 02:00:00 1
#> 2: 2 1970-01-01 -292275055-05-16 18:21:56 1677-09-21 01:47:35 NAReactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels