Skip to content

Commit 811fc52

Browse files
authored
Merge pull request #80 from csoneson/duplicated-names
Fix checks for duplicated dimnames
2 parents ce574bd + 0e22047 commit 811fc52

File tree

3 files changed

+166
-9
lines changed

3 files changed

+166
-9
lines changed

R/print_method.R

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,27 @@ print.SummarizedExperiment <- function(x, ..., n = NULL, width = NULL, n_extra =
134134
# Fix NOTEs
135135
. = NULL
136136

137+
# Stop if any column or row names are duplicated
138+
if (check_if_any_dimnames_duplicated(x, dim = "cols")) {
139+
stop("tidySummarizedExperiment says: some column names are duplicated")
140+
}
141+
if (check_if_any_dimnames_duplicated(x, dim = "rows")) {
142+
stop("tidySummarizedExperiment says: some row names are duplicated")
143+
}
137144
# Stop if column names of assays do not overlap
138-
if( check_if_assays_are_NOT_overlapped(x) )
139-
stop(
140-
"tidySummarizedExperiment says: the assays in your SummarizedExperiment have column names,
141-
but their order is not the same, and they not completely overlap."
142-
)
145+
if (check_if_assays_are_NOT_overlapped(x, dim = "cols")) {
146+
stop(
147+
"tidySummarizedExperiment says: the assays in your SummarizedExperiment have column names,
148+
but they do not completely overlap."
149+
)
150+
}
151+
if (check_if_assays_are_NOT_overlapped(x, dim = "rows")) {
152+
stop(
153+
"tidySummarizedExperiment says: the assays in your SummarizedExperiment have row names,
154+
but they do not completely overlap."
155+
)
156+
}
157+
143158

144159
# reorder assay colnames before printing
145160
# Rearrange if assays has colnames and rownames

R/utilities.R

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,14 @@ get_special_datasets <- function(se) {
621621
}
622622

623623
check_se_dimnames <- function(se) {
624+
# Stop if any column or row names are duplicated
625+
if (check_if_any_dimnames_duplicated(se, dim = "cols")) {
626+
stop("tidySummarizedExperiment says: some column names are duplicated")
627+
}
628+
if (check_if_any_dimnames_duplicated(se, dim = "rows")) {
629+
stop("tidySummarizedExperiment says: some row names are duplicated")
630+
}
631+
624632
# Stop if column names of assays do not overlap, or if some assays have
625633
# column names and others don't
626634
if (check_if_assays_are_NOT_overlapped(se, dim = "cols")) {
@@ -1269,6 +1277,51 @@ check_if_assays_are_NOT_consistently_ordered <- function(se) {
12691277
not()
12701278
}
12711279

1280+
check_if_any_dimnames_duplicated <- function(se, dim = "cols") {
1281+
stopifnot(dim %in% c("rows", "cols"))
1282+
if (dim == "rows") {
1283+
dimnames_function <- rownames
1284+
nbr_unique_dimnames_function <- function(x) length(unique(rownames(x)))
1285+
length_function <- nrow
1286+
} else {
1287+
dimnames_function <- colnames
1288+
nbr_unique_dimnames_function <- function(x) length(unique(colnames(x)))
1289+
length_function <- ncol
1290+
}
1291+
1292+
# Check assays
1293+
# If I have any assay at all
1294+
assays_check <- assays(se) |> length() |> gt(0) &&
1295+
1296+
# If I have at least one assay with dimnames
1297+
Filter(
1298+
Negate(is.null),
1299+
assays(se, withDimnames = FALSE) |>
1300+
as.list() |>
1301+
map(dimnames_function)
1302+
) |>
1303+
length() |>
1304+
gt(0) &&
1305+
1306+
# If any named assay have fewer unique names than expected
1307+
assays(se, withDimnames = FALSE) |>
1308+
as.list() |>
1309+
map(dimnames_function) |>
1310+
Filter(Negate(is.null), x = _) |>
1311+
map(unique) |>
1312+
map(length) |>
1313+
reduce(min) |>
1314+
equals(length_function(se)) |>
1315+
not()
1316+
1317+
# Check SE object
1318+
se_check <- !is.null(dimnames_function(se)) &&
1319+
nbr_unique_dimnames_function(se) != length_function(se)
1320+
1321+
# Return TRUE if either of the two checks return TRUE
1322+
assays_check || se_check
1323+
}
1324+
12721325
check_if_assays_are_NOT_overlapped <- function(se, dim = "cols") {
12731326
stopifnot(dim %in% c("rows", "cols"))
12741327
if (dim == "rows") {
@@ -1278,6 +1331,7 @@ check_if_assays_are_NOT_overlapped <- function(se, dim = "cols") {
12781331
dimnames_function <- colnames
12791332
length_function <- ncol
12801333
}
1334+
is_identical_for_reduce <- function(x,y) if (identical(x,y)) x else FALSE
12811335

12821336
# If I have any assay at all
12831337
assays(se) |> length() |> gt(0) &&
@@ -1294,13 +1348,16 @@ check_if_assays_are_NOT_overlapped <- function(se, dim = "cols") {
12941348

12951349
# If I have lack of consistency
12961350
# This will be TRUE also if some assays have dimnames and other don't
1351+
# For each assay, sort the dimnames, then check that they are all the
1352+
# same. Can't check for unique length, since some names may be repeated
1353+
# If they're not all the same, the reduce() step will return FALSE;
1354+
# otherwise, returns the (shared) dimnames
12971355
assays(se, withDimnames = FALSE) |>
12981356
as.list() |>
12991357
map(dimnames_function) |>
1300-
reduce(intersect) |>
1301-
length() |>
1302-
equals(length_function(se)) |>
1303-
not()
1358+
map(sort) |>
1359+
reduce(is_identical_for_reduce) |>
1360+
is.logical()
13041361
}
13051362

13061363

tests/testthat/test-utilities.R

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,91 @@ test_that("get_count_datasets works", {
247247
expect_equal(cds$mat2, seq(10, 18))
248248
expect_equal(cds$mat3, seq(19, 27))
249249

250+
# SE does not have dimnames, one assay has duplicated colnames, one has no colnames
251+
se1 <- se
252+
rownames(se1) <- colnames(se1) <- NULL
253+
colnames(assay(se1, "mat1", withDimnames = FALSE))[2] <-
254+
colnames(assay(se1, "mat1", withDimnames = FALSE))[1]
255+
colnames(assay(se1, "mat2", withDimnames = FALSE)) <- NULL
256+
expect_equal(colnames(assay(se1, "mat1", withDimnames = FALSE)), paste0("S", c(1, 1, 3)))
257+
expect_equal(rownames(assay(se1, "mat1", withDimnames = FALSE)), paste0("G", seq_len(3)))
258+
expect_null(colnames(assay(se1, "mat2", withDimnames = FALSE)))
259+
expect_equal(colnames(assay(se1, "mat3", withDimnames = FALSE)), paste0("S", seq_len(3)))
260+
expect_null(colnames(se1))
261+
expect_null(rownames(se1))
262+
expect_error(cds <- get_count_datasets(se1), "some column names are duplicated")
263+
264+
# SE does not have dimnames, one assay has duplicated rownames, one has no rownames
265+
se1 <- se
266+
rownames(se1) <- colnames(se1) <- NULL
267+
rownames(assay(se1, "mat1", withDimnames = FALSE))[2:3] <-
268+
rownames(assay(se1, "mat1", withDimnames = FALSE))[1]
269+
rownames(assay(se1, "mat2", withDimnames = FALSE)) <- NULL
270+
expect_equal(rownames(assay(se1, "mat1", withDimnames = FALSE)), paste0("G", c(1, 1, 1)))
271+
expect_equal(colnames(assay(se1, "mat1", withDimnames = FALSE)), paste0("S", seq_len(3)))
272+
expect_null(rownames(assay(se1, "mat2", withDimnames = FALSE)))
273+
expect_equal(rownames(assay(se1, "mat3", withDimnames = FALSE)), paste0("G", seq_len(3)))
274+
expect_null(colnames(se1))
275+
expect_null(rownames(se1))
276+
expect_error(cds <- get_count_datasets(se1), "some row names are duplicated")
277+
278+
# SE has duplicated colnames
279+
se1 <- se
280+
colnames(se1) <- paste0("S", c(1, 1, 1))
281+
expect_error(cds <- get_count_datasets(se1), "some column names are duplicated")
282+
expect_true(check_if_any_dimnames_duplicated(se1, dim = "cols"))
283+
expect_false(check_if_any_dimnames_duplicated(se1, dim = "rows"))
284+
285+
# SE has duplicated rownames
286+
se1 <- se
287+
rownames(se1) <- paste0("G", c(1, 2, 1))
288+
expect_error(cds <- get_count_datasets(se1), "some row names are duplicated")
289+
expect_false(check_if_any_dimnames_duplicated(se1, dim = "cols"))
290+
expect_true(check_if_any_dimnames_duplicated(se1, dim = "rows"))
291+
292+
# All assays + SE have duplicated colnames
293+
se1 <- se
294+
colnames(se1)[2] <-
295+
colnames(assay(se1, "mat1", withDimnames = FALSE))[2] <-
296+
colnames(assay(se1, "mat2", withDimnames = FALSE))[2] <-
297+
colnames(assay(se1, "mat3", withDimnames = FALSE))[2] <- "S1"
298+
expect_true(check_if_any_dimnames_duplicated(se1, dim = "cols"))
299+
expect_false(check_if_any_dimnames_duplicated(se1, dim = "rows"))
300+
expect_false(check_if_assays_are_NOT_overlapped(se1, dim = "cols"))
301+
expect_false(check_if_assays_are_NOT_overlapped(se1, dim = "rows"))
302+
303+
# Two assays + SE have duplicated colnames
304+
se1 <- se
305+
colnames(se1)[2] <-
306+
colnames(assay(se1, "mat1", withDimnames = FALSE))[2] <-
307+
colnames(assay(se1, "mat3", withDimnames = FALSE))[2] <- "S1"
308+
expect_true(check_if_any_dimnames_duplicated(se1, dim = "cols"))
309+
expect_false(check_if_any_dimnames_duplicated(se1, dim = "rows"))
310+
expect_true(check_if_assays_are_NOT_overlapped(se1, dim = "cols"))
311+
expect_false(check_if_assays_are_NOT_overlapped(se1, dim = "rows"))
312+
313+
# Assays have duplicated colnames in different ways
314+
se1 <- se
315+
assay(se1, "mat2") <- NULL
316+
colnames(assay(se1, "mat1", withDimnames = FALSE)) <- c("S1", "S1", "S2")
317+
colnames(assay(se1, "mat3", withDimnames = FALSE)) <- c("S1", "S2", "S2")
318+
expect_true(check_if_any_dimnames_duplicated(se1, dim = "cols"))
319+
expect_false(check_if_any_dimnames_duplicated(se1, dim = "rows"))
320+
expect_true(check_if_assays_are_NOT_overlapped(se1, dim = "cols"))
321+
expect_false(check_if_assays_are_NOT_overlapped(se1, dim = "rows"))
322+
323+
# All dimnames are NULL - not duplicated
324+
se1 <- se
325+
rownames(se1) <- colnames(se1) <- NULL
326+
rownames(assay(se1, "mat1", withDimnames = FALSE)) <-
327+
colnames(assay(se1, "mat1", withDimnames = FALSE)) <-
328+
rownames(assay(se1, "mat2", withDimnames = FALSE)) <-
329+
colnames(assay(se1, "mat2", withDimnames = FALSE)) <-
330+
rownames(assay(se1, "mat3", withDimnames = FALSE)) <-
331+
colnames(assay(se1, "mat3", withDimnames = FALSE)) <- NULL
332+
expect_false(check_if_any_dimnames_duplicated(se1, dim = "cols"))
333+
expect_false(check_if_any_dimnames_duplicated(se1, dim = "rows"))
334+
250335
# Unnamed assay(s)
251336
# se1 <- SummarizedExperiment::SummarizedExperiment(
252337
# assays = list(

0 commit comments

Comments
 (0)