From 4c38f6df55e847492481034609a41b5502ff27d9 Mon Sep 17 00:00:00 2001 From: ahl27 Date: Thu, 30 Jan 2025 11:27:16 -0500 Subject: [PATCH 1/9] Adds first draft of MIndexList class, will be used to store vmatchPDict output --- DESCRIPTION | 1 + NAMESPACE | 2 +- R/MIndexList-class.R | 76 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 R/MIndexList-class.R diff --git a/DESCRIPTION b/DESCRIPTION index babed09f..19218783 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -78,6 +78,7 @@ Collate: utils.R misc.R SparseList-class.R MIndex-class.R + MIndexList-class.R lowlevel-matching.R match-utils.R matchPattern.R diff --git a/NAMESPACE b/NAMESPACE index 7830efa9..d7c1e3d8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -213,7 +213,7 @@ exportMethods( exportClasses( #SparseList, - MIndex, ByPos_MIndex, + MIndex, ByPos_MIndex, MIndexList, PreprocessedTB, Twobit, ACtree2, PDict3Parts, PDict, TB_PDict, MTB_PDict, Expanded_TB_PDict diff --git a/R/MIndexList-class.R b/R/MIndexList-class.R new file mode 100644 index 00000000..f96dac2b --- /dev/null +++ b/R/MIndexList-class.R @@ -0,0 +1,76 @@ +### ========================================================================= +### MIndexList objects +### ------------------------------------------------------------------------- +### + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### The "MIndexList" class. +### +### This class serves as a base class to hold collections of MIndex objects. +### +### As with MIndex, in normal operations, the user should never need to create +### MIndex objects directly or to modify existing ones. Those objects are +### typically returned by a sequence matching/alignment function like +### vmatchPattern() or matchPDict(). For this reason, coercion methods are +### relatively barebones and will throw errors rather than try to correct +### + +setClass("MIndexList", + contains="CompressedList", + representation( +# "VIRTUAL", + unlistData="MIndex" + ), + prototype( + elementType="MIndex" + ) +) + + +.from_list_to_MIndexList <- function(from) +{ + ## keeping ans_class variable for potential future extensions + ans_class <- "MIndexList" + + ## ensure elements are all MIndex objects + if(!all(vapply(from, inherits, logical(1L), what="MIndex"))) + stop("Only lists of MIndex objects can be coerced to MIndexList") + IRanges:::new_CompressedList_from_list(ans_class, from) +} + +## barebones, but users shouldn't be calling this directly +## expecting to be used internally, list coercion provided for QoL +setAs("list", "MIndexList", function(from) .from_list_to_MIndexList(from)) +setAs("List", "MIndexList", function(from) .from_list_to_MIndexList(as.list(from))) + +.from_MIndexList_to_compact_char <- function(object){ + lapply(object, \(m_ind){ + paste(length(m_ind), "patterns,", + sum(lengths(m_ind)), "matches") + }) +} + +setMethod("show", "MIndexList", + function(object) { + lo <- length(object) + k <- min(5, length(object)) + diffK <- lo - 5 + cat(classNameForDisplay(object), " of length ", lo, + "\n", sep = "") + + repr <- .from_MIndexList_to_compact_char(head(object, k)) + IRanges:::.showAtomicList(CharacterList(repr), minLines=10L) + if (diffK > 0) + cat("...\n<", diffK, + ifelse(diffK == 1, + " more element>\n", " more elements>\n"), + sep="") + } +) + +setMethod("showAsCell", "MIndexList", + function(object){ + repr <- .from_MIndexList_to_compact_char(object) + showAsCell(CharacterList(repr)) + } +) From f3f2f25bad5eebad4c04f9b0cdf77faeb75c690c Mon Sep 17 00:00:00 2001 From: ahl27 Date: Thu, 13 Feb 2025 16:23:29 -0500 Subject: [PATCH 2/9] Adds MIndexList class; enables vmatchPDict for PDict,XStringSet comparison (XStringSet,XStringSet still not working --- R/MIndexList-class.R | 4 ++-- R/matchPDict.R | 21 ++++++++++++++------- src/match_pdict.c | 37 ++++++++++++++++++++++++++++++++----- 3 files changed, 48 insertions(+), 14 deletions(-) diff --git a/R/MIndexList-class.R b/R/MIndexList-class.R index f96dac2b..89b04438 100644 --- a/R/MIndexList-class.R +++ b/R/MIndexList-class.R @@ -11,8 +11,8 @@ ### As with MIndex, in normal operations, the user should never need to create ### MIndex objects directly or to modify existing ones. Those objects are ### typically returned by a sequence matching/alignment function like -### vmatchPattern() or matchPDict(). For this reason, coercion methods are -### relatively barebones and will throw errors rather than try to correct +### vmatchPDict(). For this reason, coercion methods are relatively barebones +### and will throw errors rather than try to correct ### setClass("MIndexList", diff --git a/R/matchPDict.R b/R/matchPDict.R index c7324abe..b1434f32 100644 --- a/R/matchPDict.R +++ b/R/matchPDict.R @@ -561,10 +561,8 @@ if (!identical(weight, 1L)) warning("'weight' is ignored when 'collapse=FALSE'") } - } else { - ## vmatchPDict() - stop("vmatchPDict() is not supported yet, sorry") } + ## We are doing our own dispatch here, based on the type of 'pdict'. ## TODO: Revisit this. Would probably be a better design to use a ## generic/methods approach and rely on the standard dispatch mechanism. @@ -599,8 +597,14 @@ } return(ans) } + ## vmatchPDict() - stop("vmatchPDict() is not supported yet, sorry") + ## converting all the results to MIndex objects and then wrapping the result + ans_width0 <- rep.int(length(pdict), length(subject)) + for(i in seq_along(ans)) + ans[[i]] <- new("ByPos_MIndex", width0=ans_width0, NAMES=names(pdict), ends=ans[[i]]) + names(ans) <- names(subject) + ans <- as(ans, "MIndexList") return(ans) } @@ -771,8 +775,8 @@ setMethod("whichPDict", "MaskedXString", ### 'lapply(subject, function(s) whichPDict(pdict, s, ...))'. ### The returned object is a list of length N. ### o vcountPDict(): returns an M x N matrix of integers. -### o vmatchPDict(): not supported yet! (first we need a container to -### store the results) +### o vmatchPDict(): returns an MIndexList (CompressedList of MIndex objects) +### This contains N MIndex objects, each with up to M entries ### setGeneric("vmatchPDict", signature="subject", @@ -787,7 +791,10 @@ setMethod("vmatchPDict", "ANY", function(pdict, subject, max.mismatch=0, min.mismatch=0, with.indels=FALSE, fixed=TRUE, algorithm="auto", verbose=FALSE) - stop("vmatchPDict() is not ready yet, sorry") + .vmatchPDict(pdict, subject, + max.mismatch, min.mismatch, with.indels, fixed, + algorithm, collapse=FALSE, weight=1L, + verbose, matches.as="MATCHES_AS_ENDS") ) ### Dispatch on 'subject' (see signature of generic). diff --git a/src/match_pdict.c b/src/match_pdict.c index 6244e0b1..742c0435 100644 --- a/src/match_pdict.c +++ b/src/match_pdict.c @@ -480,6 +480,28 @@ static SEXP vcount_XStringSet_XStringSet(SEXP pattern, return ans; } +SEXP vmatch_PDict3Parts_XStringSet_helper(SEXP pptb, HeadTail *headtail, MatchPDictBuf *mpdb, + SEXP xstringset, + SEXP max_mismatch, SEXP min_mismatch, + SEXP fixed){ + XStringSet_holder subject; + int subj_length; + Chars_holder subj_elt; + + subject = _hold_XStringSet(xstringset); + subj_length = _get_length_from_XStringSet_holder(&subject); + + SEXP RETURN_LIST = PROTECT(NEW_LIST(subj_length)); + for(int i=0; imatches), R_NilValue)); + _MatchPDictBuf_flush(mpdb); + } + UNPROTECT(1); + return RETURN_LIST; +} + /* --- .Call ENTRY POINT --- */ SEXP vmatch_PDict3Parts_XStringSet(SEXP pptb, SEXP pdict_head, SEXP pdict_tail, SEXP subject, @@ -495,10 +517,12 @@ SEXP vmatch_PDict3Parts_XStringSet(SEXP pptb, SEXP pdict_head, SEXP pdict_tail, matchpdict_buf = new_MatchPDictBuf_from_PDict3Parts(matches_as, pptb, pdict_head, pdict_tail); switch (matchpdict_buf.matches.ms_code) { - case MATCHES_AS_NULL: - error("vmatch_PDict3Parts_XStringSet() does not support " - "'matches_as=\"%s\"' yet, sorry", - CHAR(STRING_ELT(matches_as, 0))); + case MATCHES_AS_RANGES: + case MATCHES_AS_ENDS: + return vmatch_PDict3Parts_XStringSet_helper(pptb, &headtail, + &matchpdict_buf, subject, + max_mismatch, min_mismatch, + fixed); case MATCHES_AS_WHICH: return vwhich_PDict3Parts_XStringSet(pptb, &headtail, subject, @@ -510,8 +534,11 @@ SEXP vmatch_PDict3Parts_XStringSet(SEXP pptb, SEXP pdict_head, SEXP pdict_tail, max_mismatch, min_mismatch, fixed, collapse, weight, &matchpdict_buf); + default: + error("vmatch_PDict3Parts_XStringSet() does not support " + "'matches_as=\"%s\"' yet, sorry", + CHAR(STRING_ELT(matches_as, 0))); } - error("vmatchPDict() is not supported yet, sorry"); return R_NilValue; } From e97b468981040e2df01a702fadd2a7e101feda5e Mon Sep 17 00:00:00 2001 From: ahl27 <30053966+ahl27@users.noreply.github.com> Date: Wed, 28 Jan 2026 17:22:19 -0500 Subject: [PATCH 3/9] Fix bug in vmatchPDict that reported incorrect widths --- R/matchPDict.R | 12 ++++++------ tests/testthat/test-matchPDict.R | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/R/matchPDict.R b/R/matchPDict.R index b1434f32..dcf5865b 100644 --- a/R/matchPDict.R +++ b/R/matchPDict.R @@ -15,7 +15,7 @@ ### > library(BSgenome.Hsapiens.UCSC.hg18) ### > chr1 <- Hsapiens$chr1 ### > system.time(end_index <- endIndex(matchPDict(pdict, chr1))) -### user system elapsed +### user system elapsed ### 50.663 0.000 50.763 ### > count_index <- sapply(end_index, length) ### > table(count_index) @@ -64,8 +64,8 @@ ### 1M 36 2.5 sec 717M 106 sec 103 sec 0 ### 10M 36 56 sec 6724M 351 sec 200 sec 0 ### 10M 12 7.5 sec 340M 227 sec 216 sec 100M -### 30M 12 27 sec 523M 491 sec ? -### +### 30M 12 27 sec 523M 491 sec ? +### ### III. Inexact matching ### --------------------- ### pdict <- PDict(c("acgt", "gt", "cgt", "ac"), tb.end=2) @@ -582,7 +582,8 @@ max.mismatch, min.mismatch, with.indels, fixed, algorithm, collapse, weight, verbose, matches.as) - if (is.null(which_pp_excluded)) + if (is.null(which_pp_excluded) && matches.as %in% + c("MATCHES_AS_WHICH", "MATCHES_AS_COUNTS")) return(ans) if (matches.as == "MATCHES_AS_WHICH") { ## vwhichPDict() @@ -600,9 +601,8 @@ ## vmatchPDict() ## converting all the results to MIndex objects and then wrapping the result - ans_width0 <- rep.int(length(pdict), length(subject)) for(i in seq_along(ans)) - ans[[i]] <- new("ByPos_MIndex", width0=ans_width0, NAMES=names(pdict), ends=ans[[i]]) + ans[[i]] <- new("ByPos_MIndex", width0=width(pdict), NAMES=names(pdict), ends=ans[[i]]) names(ans) <- names(subject) ans <- as(ans, "MIndexList") return(ans) diff --git a/tests/testthat/test-matchPDict.R b/tests/testthat/test-matchPDict.R index 722b7372..c6193eb8 100644 --- a/tests/testthat/test-matchPDict.R +++ b/tests/testthat/test-matchPDict.R @@ -79,7 +79,8 @@ test_that("vcount/vwhich/vmatchPDict() work", { ## canary tests for future changes pdict <- PDict(c("acgt", "gt", "cgt", "ac"), tb.end=2) d <- DNAString("acggaccg") - dss <- DNAStringSet(list(d,d)) + d2 <- DNAString("gtcgtacgt") + dss <- DNAStringSet(list(d,d2)) dvv <- Views(d, start=rep(1,2), width=rep(length(d), 2)) # Disallowed cases From 738005b152cdbcf7b3505d97e8cdf5d046e0cac3 Mon Sep 17 00:00:00 2001 From: ahl27 <30053966+ahl27@users.noreply.github.com> Date: Thu, 29 Jan 2026 17:29:04 -0500 Subject: [PATCH 4/9] Add tests to verify vmatchPDict behavior is as expected --- tests/testthat/test-matchPDict.R | 44 +++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test-matchPDict.R b/tests/testthat/test-matchPDict.R index c6193eb8..43f5925a 100644 --- a/tests/testthat/test-matchPDict.R +++ b/tests/testthat/test-matchPDict.R @@ -75,12 +75,11 @@ test_that("inexact PDict matching works", { expect_equal(countPDict(pdict, d, max.mismatch=2), c(2,0,2,2)) }) -test_that("vcount/vwhich/vmatchPDict() work", { +test_that("vcount/vwhich/vmatchPDict work", { ## canary tests for future changes pdict <- PDict(c("acgt", "gt", "cgt", "ac"), tb.end=2) d <- DNAString("acggaccg") - d2 <- DNAString("gtcgtacgt") - dss <- DNAStringSet(list(d,d2)) + dss <- DNAStringSet(list(d,d)) dvv <- Views(d, start=rep(1,2), width=rep(length(d), 2)) # Disallowed cases @@ -117,10 +116,6 @@ test_that("vcount/vwhich/vmatchPDict() work", { expect_equal(vcountPDict(pdict, dvv, max.mismatch=2), exp_out) expect_equal(vwhichPDict(pdict, dvv, max.mismatch=2), list(c(1,3,4),c(1,3,4))) - - ## this will fail when we implement it to remind me to add tests for it - expect_error2(vmatchPDict(pdict, dss), - "vmatchPDict\\(\\) is not ready yet") }) test_that("MIndex functionality works", { @@ -234,3 +229,38 @@ test_that("matchPDict() works for variable width dictionary", { } }) +test_that("vmatchPDict is equivalent to matchPDict output", { + pdict <- PDict(c("acgt", "gt", "cgt", "ac"), tb.end=2) + d <- DNAString("acggaccg") + d2 <- DNAString("gtcgtacgt") + dss <- DNAStringSet(list(d,d2)) + + mindex_obj_comp <- function(m1, m2){ + if(!is(m1, "MIndex") || !is(m2, "MIndex")){ + return(FALSE) + } + + if(length(m1) != length(m1)){ + return (FALSE) + } + + for(i in seq_along(m1)){ + l1 <- m1[[i]] + l2 <- m2[[i]] + for(attr_name in c("start", "width")){ + if(!identical(attr(l1, attr_name), attr(l2, attr_name))){ + return(FALSE) + } + } + } + return(TRUE) + } + + vmatch_output <- vmatchPDict(pdict, dss) + + match1_output <- matchPDict(pdict, dss[[1]]) + match2_output <- matchPDict(pdict, dss[[2]]) + + expect_true(mindex_obj_comp(vmatch_output[[1]], match1_output)) + expect_true(mindex_obj_comp(vmatch_output[[2]], match2_output)) +}) From 1ed6e508fbe0ad1ba362cc7a4e01b9eef1ed485c Mon Sep 17 00:00:00 2001 From: ahl27 <30053966+ahl27@users.noreply.github.com> Date: Thu, 29 Jan 2026 17:43:34 -0500 Subject: [PATCH 5/9] Add vmatchPDict entry to man page --- man/matchPDict-exact.Rd | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/man/matchPDict-exact.Rd b/man/matchPDict-exact.Rd index 983bcec5..482dd409 100644 --- a/man/matchPDict-exact.Rd +++ b/man/matchPDict-exact.Rd @@ -55,9 +55,10 @@ and \code{whichPDict} returns the "who" information i.e. which patterns in the input dictionary have at least one match. - \code{vcountPDict} and \code{vwhichPDict} are vectorized versions - of \code{countPDict} and \code{whichPDict}, respectively, that is, - they work on a set of reference sequences in a vectorized fashion. + \code{vcountPDict}, \code{vwhichPDict}, and \code{vmatchPDict} are vectorized + versions of \code{countPDict}, \code{whichPDict}, and \code{matchPDict} + respectively, that is, they work on a set of reference sequences in a + vectorized fashion. This man page shows how to use these functions (aka the \code{*PDict} functions) for exact matching of a constant width dictionary i.e. @@ -86,6 +87,9 @@ vcountPDict(pdict, subject, vwhichPDict(pdict, subject, max.mismatch=0, min.mismatch=0, with.indels=FALSE, fixed=TRUE, algorithm="auto", verbose=FALSE) +matchPDict(pdict, subject, + max.mismatch=0, min.mismatch=0, with.indels=FALSE, fixed=TRUE, + algorithm="auto", verbose=FALSE) } \arguments{ @@ -111,7 +115,7 @@ vwhichPDict(pdict, subject, \code{whichPDict}. An \link{XStringSet} object containing the subject sequences - for \code{vcountPDict} and \code{vwhichPDict}. + for \code{vmatchPDict}, \code{vcountPDict}, \and code{vwhichPDict}. If \code{pdict} is a \link{PDict} object (i.e. a preprocessed dictionary), then \code{subject} must be of base class \link{DNAString}. @@ -210,6 +214,10 @@ vwhichPDict(pdict, subject, (see above for the details). \code{vwhichPDict} returns a list of \code{N} integer vectors. + + \code{vmatchPDict} returns an \link{MIndexList}, which is essentially a list + of length \code{N} where each entry is an \link{MIndex} object (as would be + obtained from running \code{matchPDict} on each of the sequences). } \author{H. Pagès} @@ -513,7 +521,7 @@ if (interactive()) { pdict2 <- PDict(probes2) ## Get the mapping from probes1 to probes2 (based on exact matching): - map1to2 <- vwhichPDict(pdict2, probes1) + map1to2 <- vwhichPDict(pdict2, probes1) ## The following helper function uses the probe level mapping to induce ## the mapping at the probe set IDs level (from hgu95av2 to hgu133a). From 7a89cf38ab877ee3019070f56d55e217aa2f5f8c Mon Sep 17 00:00:00 2001 From: ahl27 <30053966+ahl27@users.noreply.github.com> Date: Tue, 3 Feb 2026 16:31:39 -0500 Subject: [PATCH 6/9] add matchPDict tests --- R/matchPDict.R | 15 +++++++++------ tests/testthat/test-MIndexListClass.R | 0 2 files changed, 9 insertions(+), 6 deletions(-) create mode 100644 tests/testthat/test-MIndexListClass.R diff --git a/R/matchPDict.R b/R/matchPDict.R index dcf5865b..e6321972 100644 --- a/R/matchPDict.R +++ b/R/matchPDict.R @@ -598,13 +598,16 @@ } return(ans) } + if(matches.as == "MATCHES_AS_ENDS") { + ## vmatchPDict() by ends + ## converting all the results to MIndex objects and then wrapping the result + for(i in seq_along(ans)) + ans[[i]] <- new("ByPos_MIndex", width0=width(pdict), NAMES=names(pdict), ends=ans[[i]]) + names(ans) <- names(subject) + ans <- as(ans, "MIndexList") + return(ans) + } - ## vmatchPDict() - ## converting all the results to MIndex objects and then wrapping the result - for(i in seq_along(ans)) - ans[[i]] <- new("ByPos_MIndex", width0=width(pdict), NAMES=names(pdict), ends=ans[[i]]) - names(ans) <- names(subject) - ans <- as(ans, "MIndexList") return(ans) } diff --git a/tests/testthat/test-MIndexListClass.R b/tests/testthat/test-MIndexListClass.R new file mode 100644 index 00000000..e69de29b From 17347cad2f98e4051fea83bf7427bb0704bbec9e Mon Sep 17 00:00:00 2001 From: ahl27 <30053966+ahl27@users.noreply.github.com> Date: Tue, 3 Feb 2026 17:07:22 -0500 Subject: [PATCH 7/9] Fix behavior of vmatchPDict when matches.as=MATCHES_AS_RANGES --- R/matchPDict.R | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/R/matchPDict.R b/R/matchPDict.R index e6321972..1f1a9381 100644 --- a/R/matchPDict.R +++ b/R/matchPDict.R @@ -601,13 +601,48 @@ if(matches.as == "MATCHES_AS_ENDS") { ## vmatchPDict() by ends ## converting all the results to MIndex objects and then wrapping the result - for(i in seq_along(ans)) - ans[[i]] <- new("ByPos_MIndex", width0=width(pdict), NAMES=names(pdict), ends=ans[[i]]) + for(i in seq_along(ans)) { + ans[[i]] <- new("ByPos_MIndex", + width0=width(pdict), + NAMES=names(pdict), + ends=ans[[i]]) + } + names(ans) <- names(subject) ans <- as(ans, "MIndexList") return(ans) } + if(matches.as == "MATCHES_AS_RANGES") { + ## vmatchPDict() by ranges + ## This is the same as MATCHES_AS_ENDS, except ans[[i]] holds two lists + ## - The first list has the start position for each range + ## - The second list has the width of the range (including the start) + + for(i in seq_along(ans)){ + ## First we have to move from starts to ends to align with the + ## Pos_MIndex constructor + ans_start <- ans[[i]][[1]] + ans_width <- ans[[i]][[2]] + for(j in seq_along(ans_start)){ + if(!is.null(ans_start[[j]])){ + ## Note that the width includes the start, so we subtract 1 + ans_start[[j]] <- ans_start[[j]] + ans_width[[j]] - 1L + } + } + ## Now ans_start holds the **end** position + ## width(pdict) will be equivalent to ifelse(is.null(w), 0, w[1]) for + ## w in ans_width + ans[[i]] <- new("ByPos_MIndex", + width0=width(pdict), + NAMES=names(pdict), + ends=ans_start) + } + names(ans) <- names(subject) + ans <- as(ans, "MIndexList") + return(ans) + } + warning("Failed to post-process output of .vmatch(pdict, subject)") return(ans) } From 866da5164496d9611fa7dca048aa34eb8579abcf Mon Sep 17 00:00:00 2001 From: ahl27 <30053966+ahl27@users.noreply.github.com> Date: Tue, 3 Feb 2026 17:14:03 -0500 Subject: [PATCH 8/9] update man page and tests --- man/matchPDict-exact.Rd | 2 +- tests/testthat/test-matchPDict.R | 30 ++++++++++++++++++++++++------ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/man/matchPDict-exact.Rd b/man/matchPDict-exact.Rd index 482dd409..035128d6 100644 --- a/man/matchPDict-exact.Rd +++ b/man/matchPDict-exact.Rd @@ -87,7 +87,7 @@ vcountPDict(pdict, subject, vwhichPDict(pdict, subject, max.mismatch=0, min.mismatch=0, with.indels=FALSE, fixed=TRUE, algorithm="auto", verbose=FALSE) -matchPDict(pdict, subject, +vmatchPDict(pdict, subject, max.mismatch=0, min.mismatch=0, with.indels=FALSE, fixed=TRUE, algorithm="auto", verbose=FALSE) } diff --git a/tests/testthat/test-matchPDict.R b/tests/testthat/test-matchPDict.R index 43f5925a..e6134467 100644 --- a/tests/testthat/test-matchPDict.R +++ b/tests/testthat/test-matchPDict.R @@ -256,11 +256,29 @@ test_that("vmatchPDict is equivalent to matchPDict output", { return(TRUE) } + ## Basic matching vmatch_output <- vmatchPDict(pdict, dss) - - match1_output <- matchPDict(pdict, dss[[1]]) - match2_output <- matchPDict(pdict, dss[[2]]) - - expect_true(mindex_obj_comp(vmatch_output[[1]], match1_output)) - expect_true(mindex_obj_comp(vmatch_output[[2]], match2_output)) + expect_true(mindex_obj_comp(vmatch_output[[1]], matchPDict(pdict, dss[[1]]))) + expect_true(mindex_obj_comp(vmatch_output[[2]], matchPDict(pdict, dss[[2]]))) + + ## max.mismatch + vmatch_output <- vmatchPDict(pdict, dss, max.mismatch = 2) + expect_true(mindex_obj_comp(vmatch_output[[1]], + matchPDict(pdict, dss[[1]], max.mismatch = 2))) + expect_true(mindex_obj_comp(vmatch_output[[2]], + matchPDict(pdict, dss[[2]], max.mismatch = 2))) + + ## min.mismatch + vmatch_output <- vmatchPDict(pdict, dss, min.mismatch = 1, max.mismatch = 2) + expect_true(mindex_obj_comp(vmatch_output[[1]], + matchPDict(pdict, dss[[1]], min.mismatch = 1, max.mismatch = 2))) + expect_true(mindex_obj_comp(vmatch_output[[2]], + matchPDict(pdict, dss[[2]], min.mismatch = 1, max.mismatch = 2))) + + ## fixed + vmatch_output <- vmatchPDict(pdict, dss, fixed = FALSE) + expect_true(mindex_obj_comp(vmatch_output[[1]], + matchPDict(pdict, dss[[1]], fixed = FALSE))) + expect_true(mindex_obj_comp(vmatch_output[[2]], + matchPDict(pdict, dss[[2]], fixed = FALSE))) }) From f5ac147eb1239ca03c0054ea9438e7df2d72af44 Mon Sep 17 00:00:00 2001 From: ahl27 <30053966+ahl27@users.noreply.github.com> Date: Tue, 3 Feb 2026 17:21:19 -0500 Subject: [PATCH 9/9] Improve warning message for vmatchPDict on two XStringSets --- src/match_pdict.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/match_pdict.c b/src/match_pdict.c index 742c0435..cade8820 100644 --- a/src/match_pdict.c +++ b/src/match_pdict.c @@ -572,7 +572,8 @@ SEXP vmatch_XStringSet_XStringSet(SEXP pattern, with_indels, fixed, algorithm, collapse, weight); } - error("vmatchPDict() is not supported yet, sorry"); + error("vmatchPDict() on two XStringSets is not supported yet. " + "Please use a PDict object instead (see `?PDict` for more info)."); return R_NilValue; }