diff --git a/r/NAMESPACE b/r/NAMESPACE
index 982ca440..e9998384 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -65,6 +65,8 @@ export(min_by_row)
 export(min_scalar)
 export(multiply_cols)
 export(multiply_rows)
+export(normalize_log)
+export(normalize_tfidf)
 export(nucleosome_counts)
 export(open_fragments_10x)
 export(open_fragments_dir)
@@ -108,6 +110,8 @@ export(rowVars.default)
 export(sctransform_pearson)
 export(select_cells)
 export(select_chromosomes)
+export(select_features_mean)
+export(select_features_variance)
 export(select_regions)
 export(set_trackplot_height)
 export(set_trackplot_label)
diff --git a/r/NEWS.md b/r/NEWS.md
index 6cab91a5..dc220204 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -10,6 +10,7 @@ Contributions welcome :)
 
 ## Features
 - Add `write_matrix_anndata_hdf5_dense()` which allows writing matrices in AnnData's dense format, most commonly used for `obsm` or `varm` matrices. (Thanks to @ycli1995 for pull request #166)
+- Add normalization helper functions `normalize_log()` and `normalize_tfidf()` (pull request #168)
 
 ## Bug-fixes
 - Fix error message printing when MACS crashes during `call_peaks_macs()` (pull request #175) 
@@ -39,6 +40,8 @@ of the new features this release and will continue to help with maintenance and
 - Add `rowQuantiles()` and `colQuantiles()` functions, which return the quantiles of each row/column of a matrix. Currently `rowQuantiles()` only works on row-major matrices and `colQuantiles()` only works on col-major matrices.
   If `matrixStats` or `MatrixGenerics` packages are installed, `BPCells::colQuantiles()` will fall back to their implementations for non-BPCells objects. (pull request #128)
 - Add `pseudobulk_matrix()` which allows pseudobulk aggregation by `sum` or `mean` and calculation of per-pseudobulk `variance` and `nonzero` statistics for each gene (pull request #128)
+- Add functions `normalize_tfidf()` and `normalize_log()`, which allow for easy normalization of iterable matrices using TF-IDF or log1p(pull request #168)
+- Add feature selection functions `select_features_by_{variance,dispersion,mean}()`, with parameterization for normalization steps, and number of variable features (pull request #169)
 
 ## Improvements
 - `trackplot_loop()` now accepts discrete color scales
diff --git a/r/R/singlecell_utils.R b/r/R/singlecell_utils.R
index 7267e19b..37f61d98 100644
--- a/r/R/singlecell_utils.R
+++ b/r/R/singlecell_utils.R
@@ -6,6 +6,146 @@
 # option. This file may not be copied, modified, or distributed
 # except according to those terms.
 
+
+#################
+# Feature selection
+#################
+
+#' Feature selection functions
+#' 
+#' Apply a feature selection method to a `(features x cells)` matrix.
+#' @rdname feature_selection
+#' @param mat (IterableMatrix) dimensions features x cells
+#' @param num_feats (float) Number of features to return.  If the number given is between 0 and 1, treat as a proportion of 
+#' the number of rows, rounded down.  Otherwise, treat as an absolute number.
+#' If the number is higher than the number of features in the matrix, 
+#' all features will be returned.
+#' @param normalize (function) Normalize matrix using a given function. If `NULL`, no normalization is performed.
+#' @param threads (integer) Number of threads to use.
+#' @returns
+#' Return a dataframe with the following columns, sorted descending by score:
+#' - `names`: Feature name.
+#' - `score`: Scoring of the feature, depending on the method used.  
+#' - `highly_variable`: Logical vector of whether the feature is highly variable.
+#'
+#' Each different feature selection method will have a different scoring method:
+#' - `select_features_by_variance`: Score representing variance of each feature.
+#' @details 
+#' `select_features_by_variance` Calculates the variance of each feature using the following process:
+#'  1. Perform an optional term frequency + log normalization, for each feature.
+#'  2. Find `num_feats` features with the highest variance.
+#' @export
+select_features_variance <- function(
+  mat, num_feats = 0.05, 
+  normalize = NULL,
+  threads = 1L
+) {
+  assert_greater_than_zero(num_feats)
+  assert_is_wholenumber(num_feats)
+  assert_len(num_feats, 1)
+  assert_is(num_feats, "numeric")
+  if (rlang::is_missing(mat)) {
+    return(create_partial(
+      missing_args = list(
+        num_feats = missing(num_feats),
+        normalize = missing(normalize), 
+        threads = missing(threads)
+      )
+    ))
+  }
+  assert_is(mat, "IterableMatrix")
+  num_feats <- min(max(num_feats, 0), nrow(mat))
+  if (num_feats < 1 && num_feats > 0) num_feats <- floor(nrow(mat) * num_feats)
+  if (!is.null(normalize)) mat <- partial_apply(normalize, threads = threads)(mat)
+  features_df <- tibble::tibble(
+    names = rownames(mat),
+    score = matrix_stats(mat, row_stats = "variance", threads = threads)$row_stats["variance",]
+  ) %>% 
+    dplyr::arrange(desc(score)) %>% 
+    dplyr::mutate(highly_variable = dplyr::row_number() <= num_feats)
+  return(features_df)
+}
+
+
+#' @rdname feature_selection
+#' @returns
+#'  - `select_features_by_dispersion`: Score representing the dispersion of each feature.
+#' @details 
+#' `select_features_by_dispersion` calculates the dispersion of each feature using the following process:
+#'  1. Perform an optional term frequency + log normalization, for each feature.
+#'  2. Find the dispersion (variance/mean) of each feature.
+#'  3. Find `num_feats` features with the highest dispersion.
+select_features_dispersion <- function(
+  mat, num_feats = 0.05, 
+  normalize = NULL,
+  threads = 1L
+) {
+  assert_greater_than_zero(num_feats)
+  assert_is_wholenumber(num_feats)
+  assert_len(num_feats, 1)
+  assert_is(num_feats, "numeric")
+  if (rlang::is_missing(mat)) {
+    return(create_partial(
+      missing_args = list(
+        num_feats = missing(num_feats),
+        normalize = missing(normalize), 
+        threads = missing(threads)
+      )
+    ))
+  }
+  num_feats <- min(max(num_feats, 0), nrow(mat))
+  if (num_feats < 1 && num_feats > 0) num_feats <- floor(nrow(mat) * num_feats)
+  assert_is(mat, "IterableMatrix")
+  if (!is.null(normalize)) mat <- partial_apply(normalize, threads = threads)(mat)
+  mat_stats <- matrix_stats(mat, row_stats = "variance", threads = threads)
+  features_df <- tibble::tibble(
+    names = rownames(mat),
+    score = mat_stats$row_stats["variance", ] / mat_stats$row_stats["mean", ]
+  ) %>% 
+    dplyr::arrange(desc(score)) %>% 
+    dplyr::mutate(highly_variable = dplyr::row_number() <= num_feats)
+  return(features_df)
+}
+
+
+#' @rdname feature_selection
+#' @returns
+#' - `select_features_by_mean`: Score representing the mean accessibility of each feature.
+#' @details 
+#' `select_features_by_mean` calculates the mean accessibility of each feature using the following process:
+#' 1. Get the sum of each binarized feature.
+#' 2. Find `num_feats` features with the highest accessibility.
+#' @export
+select_features_mean <- function(mat, num_feats = 0.05, normalize = NULL, threads = 1L) {
+  assert_is_wholenumber(num_feats)
+  assert_greater_than_zero(num_feats)
+  assert_is(num_feats, "numeric")
+  if (rlang::is_missing(mat)) {
+    return(create_partial(
+      missing_args = list(
+        num_feats = missing(num_feats),
+        normalize = missing(normalize), 
+        threads = missing(threads)
+      )
+    ))
+  }
+  assert_is(mat, "IterableMatrix")
+  num_feats <- min(max(num_feats, 0), nrow(mat))
+  if (num_feats < 1 && num_feats > 0) num_feats <- floor(nrow(mat) * num_feats)
+  if (!is.null(normalize)) mat <- partial_apply(normalize, threads = threads)(mat)
+  # get the sum of each feature, binarized
+  # get the top features
+  
+  features_df <- tibble::tibble(
+    names = rownames(mat),
+    score = matrix_stats(mat, row_stats = "nonzero", threads = threads)$row_stats["nonzero", ]
+  ) %>%
+    dplyr::arrange(desc(score)) %>%
+    dplyr::mutate(highly_variable = dplyr::row_number() <= num_feats)
+  return(features_df)
+}
+
+
 #' Test for marker features
 #'
 #' Given a features x cells matrix, perform one-vs-all differential
diff --git a/r/R/transforms.R b/r/R/transforms.R
index 2a5de994..3ce8ac56 100644
--- a/r/R/transforms.R
+++ b/r/R/transforms.R
@@ -923,3 +923,89 @@ regress_out <- function(mat, latent_data, prediction_axis = c("row", "col")) {
     vars_to_regress = vars_to_regress
   )
 }
+
+#################
+# Normalizations
+#################
+
+#' Normalization helper functions
+#' 
+#' Apply standard normalizations to a `(features x cells)` counts matrix.
+#' 
+#' @rdname normalize
+#' @param mat (IterableMatrix) Counts matrix to normalize. `(features x cells)`
+#' @param scale_factor (numeric) Scale factor to multiply matrix by for log normalization.
+#' @param threads (integer) Number of threads to use.
+#' @returns For each element \eqn{x_{ij}} in matrix \eqn{X} with \eqn{i} features and \eqn{j} cells, 
+#'  transform to a normalized value \eqn{\tilde{x}_{ij}} calculated as:
+#'  
+#'   - `normalize_log`: \eqn{\tilde{x}_{ij} = \log(\frac{x_{ij} \cdot \text{scaleFactor}}{\text{colSum}_j} + 1)}
+#' @details - `normalize_log`: Corresponds to `Seurat::NormalizeLog`
+#' @export
+normalize_log <- function(mat, scale_factor = 1e4, threads = 1L) {
+  assert_is_numeric(scale_factor)
+  assert_greater_than_zero(scale_factor)
+  if (rlang::is_missing(mat)) {
+    return(
+      create_partial(
+        missing_args = list(
+          scale_factor = missing(scale_factor), 
+          threads = missing(threads)
+        )
+      )
+    ) 
+  }
+  assert_is(mat, "IterableMatrix")
+  read_depth <- matrix_stats(mat, col_stats = c("mean"), threads = threads)$col_stats["mean", ] * nrow(mat)
+  mat <- mat %>% multiply_cols(1 / read_depth)
+  return(log1p(mat * scale_factor))
+}
+
+
+#' @rdname normalize
+#' @param feature_means (numeric, optional) Pre-calculated means of the features to normalize by. If no names are provided, then 
+#' each numeric value is assumed to correspond to the feature mean for the corresponding row of the matrix.
+#' Else, map each feature name to its mean value.
+#' @returns - `normalize_tfidf`: \eqn{\tilde{x}_{ij} = \log(\frac{x_{ij} \cdot \text{scaleFactor}}{\text{rowMean}_i\cdot \text{colSum}_j} + 1)}
+#' @details - `normalize_tfidf`: This follows the formula from Stuart, Butler et al. 2019, used by default in `ArchR::addIterativeLSI()` and `Signac::RunTFIDF()`
+#' @export
+normalize_tfidf <- function(
+  mat, feature_means = NULL,
+  scale_factor = 1e4, threads = 1L
+) {
+  assert_is_wholenumber(threads)
+  if (rlang::is_missing(mat)) {
+    return(
+      create_partial(
+        missing_args = list(
+          feature_means = missing(feature_means), 
+          scale_factor = missing(scale_factor), 
+          threads = missing(threads)
+      )
+      )
+    )
+  }
+  assert_is(mat, "IterableMatrix")
+  # If feature means are passed in, only need to calculate term frequency
+  if (is.null(feature_means)) {
+    mat_stats <- matrix_stats(mat, row_stats = c("mean"), col_stats = c("mean"))
+    feature_means <- mat_stats$row_stats["mean", ]
+    read_depth <- mat_stats$col_stats["mean", ] * nrow(mat)
+  } else {
+    assert_is_numeric(feature_means)
+    if (!is.null(names(feature_means)) && !is.null(rownames(mat))) {
+      # Make sure every name in feature means exists in rownames(mat)
+      # In the case there is a length mismatch but the feature names all exist in feature_means
+      # will not error out
+      assert_true(all(rownames(mat) %in% names(feature_means)))
+      feature_means <- feature_means[rownames(mat)]
+    } else {
+      assert_len(feature_means, nrow(mat))
+    }
+    read_depth <- matrix_stats(mat, col_stats = c("mean"), threads = threads)$col_stats["mean", ] * nrow(mat)
+  }
+  tf <- mat %>% multiply_cols(1 / read_depth)
+  idf <- 1 / feature_means
+  tf_idf_mat <- tf %>% multiply_rows(idf)
+  return(log1p(tf_idf_mat * scale_factor))
+}
\ No newline at end of file
diff --git a/r/R/utils.R b/r/R/utils.R
index 4ea62d15..d7c65358 100644
--- a/r/R/utils.R
+++ b/r/R/utils.R
@@ -56,4 +56,69 @@ log_progress <- function(msg, add_timestamp = TRUE){
   } else {
     message(msg)
   }
+}
+
+#' Helper to create partial functions
+#'
+#' Automatically creates a partial application of the caller
+#' function including all non-missing arguments. 
+#'
+#' @param missing_args (named list[bool]) Any named index with a TRUE value
+#'   will be treated as missing. Designed to be used in the caller with the
+#'   `base::missing()` function to detect unspecified arguments with default values,
+#'   or to manually specifiy other arguments that should not be specialized
+#' @return A `bpcells_partial` object (a function with some extra attributes)
+#' @keywords internal
+create_partial <- function(missing_args=list()) {
+  env <- rlang::caller_env()
+  fn_sym <- rlang::caller_call()[[1]]
+  fn <- rlang::caller_fn()
+
+  args <- list()
+  for (n in names(formals(fn))) {
+    if (n %in% names(missing_args) && missing_args[[n]]) next
+    if (rlang::is_missing(env[[n]])) next
+    args[[n]] <- env[[n]]
+  }
+
+  ret <- do.call(partial_apply, c(fn, args))
+  attr(ret, "body")[[1]] <- fn_sym
+  return(ret)
+}
+
+
+#' Create partial function calls
+#' 
+#' Specify some but not all arguments to a function.
+#'
+#' @param f A function
+#' @param ... Named arguments to `f`
+#' @param .overwrite (bool) If `f` is already an output from
+#'   `partial_apply()`, whether parameter re-definitions should
+#'   be ignored or overwrite the existing definitions
+#' @return A `bpcells_partial` object (a function with some extra attributes)
+#' @keywords internal
+partial_apply <- function(f, ..., .overwrite=FALSE) {
+  args <- rlang::list2(...)
+
+  if (is(f, "bpcells_partial")) {
+    prev_args <- attr(f, "args")
+    for (a in names(prev_args)) {
+      if (!(.overwrite && a %in% names(args))) {
+        args[[a]] <- prev_args[[a]]
+      } 
+    }
+    f <- attr(f, "fn")
+    function_name <- attr(f, "body")[[1]]
+  } else {
+    function_name <- rlang::sym(rlang::caller_arg(f))
+  }
+  partial_fn <- do.call(purrr::partial, c(f, args))
+  attr(partial_fn, "body")[[1]] <- function_name
+  structure(
+    partial_fn,
+    class = c("bpcells_partial", "purrr_function_partial", "function"),
+    args = args,
+    fn = f
+  )
 }
\ No newline at end of file
diff --git a/r/man/create_partial.Rd b/r/man/create_partial.Rd
new file mode 100644
index 00000000..fe92fd5a
--- /dev/null
+++ b/r/man/create_partial.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{create_partial}
+\alias{create_partial}
+\title{Helper to create partial functions}
+\usage{
+create_partial(missing_args = list())
+}
+\arguments{
+\item{missing_args}{(named list\link{bool}) Any named index with a TRUE value
+will be treated as missing. Designed to be used in the caller with the
+\code{base::missing()} function to detect unspecified arguments with default values,
+or to manually specifiy other arguments that should not be specialized}
+}
+\value{
+A \code{bpcells_partial} object (a function with some extra attributes)
+}
+\description{
+Automatically creates a partial application of the caller
+function including all non-missing arguments.
+}
+\keyword{internal}
diff --git a/r/man/feature_selection.Rd b/r/man/feature_selection.Rd
new file mode 100644
index 00000000..f83e2199
--- /dev/null
+++ b/r/man/feature_selection.Rd
@@ -0,0 +1,75 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/singlecell_utils.R
+\name{select_features_variance}
+\alias{select_features_variance}
+\alias{select_features_dispersion}
+\alias{select_features_mean}
+\title{Feature selection functions}
+\usage{
+select_features_variance(mat, num_feats = 0.05, normalize = NULL, threads = 1L)
+
+select_features_dispersion(
+  mat,
+  num_feats = 0.05,
+  normalize = NULL,
+  threads = 1L
+)
+
+select_features_mean(mat, num_feats = 0.05, normalize = NULL, threads = 1L)
+}
+\arguments{
+\item{mat}{(IterableMatrix) dimensions features x cells}
+
+\item{num_feats}{(float) Number of features to return.  If the number given is between 0 and 1, treat as a proportion of
+the number of rows, rounded down.  Otherwise, treat as an absolute number.
+If the number is higher than the number of features in the matrix,
+all features will be returned.}
+
+\item{normalize}{(function) Normalize matrix using a given function. If \code{NULL}, no normalization is performed.}
+
+\item{threads}{(integer) Number of threads to use.}
+}
+\value{
+Return a dataframe with the following columns, sorted descending by score:
+\itemize{
+\item \code{names}: Feature name.
+\item \code{score}: Scoring of the feature, depending on the method used.
+\item \code{highly_variable}: Logical vector of whether the feature is highly variable.
+}
+
+Each different feature selection method will have a different scoring method:
+\itemize{
+\item \code{select_features_by_variance}: Score representing variance of each feature.
+}
+
+\itemize{
+\item \code{select_features_by_dispersion}: Score representing the dispersion of each feature.
+}
+
+\itemize{
+\item \code{select_features_by_mean}: Score representing the mean accessibility of each feature.
+}
+}
+\description{
+Apply a feature selection method to a \verb{(features x cells)} matrix.
+}
+\details{
+\code{select_features_by_variance} Calculates the variance of each feature using the following process:
+\enumerate{
+\item Perform an optional term frequency + log normalization, for each feature.
+\item Find \code{num_feats} features with the highest variance.
+}
+
+\code{select_features_by_dispersion} calculates the dispersion of each feature using the following process:
+\enumerate{
+\item Perform an optional term frequency + log normalization, for each feature.
+\item Find the dispersion (variance/mean) of each feature.
+\item Find \code{num_feats} features with the highest dispersion.
+}
+
+\code{select_features_by_mean} calculates the mean accessibility of each feature using the following process:
+\enumerate{
+\item Get the sum of each binarized feature.
+\item Find \code{num_feats} features with the highest accessibility.
+}
+}
diff --git a/r/man/normalize.Rd b/r/man/normalize.Rd
new file mode 100644
index 00000000..ac53f2c0
--- /dev/null
+++ b/r/man/normalize.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/transforms.R
+\name{normalize_log}
+\alias{normalize_log}
+\alias{normalize_tfidf}
+\title{Normalization helper functions}
+\usage{
+normalize_log(mat, scale_factor = 10000, threads = 1L)
+
+normalize_tfidf(mat, feature_means = NULL, scale_factor = 10000, threads = 1L)
+}
+\arguments{
+\item{mat}{(IterableMatrix) Counts matrix to normalize. \verb{(features x cells)}}
+
+\item{scale_factor}{(numeric) Scale factor to multiply matrix by for log normalization.}
+
+\item{threads}{(integer) Number of threads to use.}
+
+\item{feature_means}{(numeric, optional) Pre-calculated means of the features to normalize by. If no names are provided, then
+each numeric value is assumed to correspond to the feature mean for the corresponding row of the matrix.
+Else, map each feature name to its mean value.}
+}
+\value{
+For each element \eqn{x_{ij}} in matrix \eqn{X} with \eqn{i} features and \eqn{j} cells,
+transform to a normalized value \eqn{\tilde{x}_{ij}} calculated as:
+\itemize{
+\item \code{normalize_log}: \eqn{\tilde{x}_{ij} = \log(\frac{x_{ij} \cdot \text{scaleFactor}}{\text{colSum}_j} + 1)}
+}
+
+\itemize{
+\item \code{normalize_tfidf}: \eqn{\tilde{x}_{ij} = \log(\frac{x_{ij} \cdot \text{scaleFactor}}{\text{rowMean}_i\cdot \text{colSum}_j} + 1)}
+}
+}
+\description{
+Apply standard normalizations to a \verb{(features x cells)} counts matrix.
+}
+\details{
+\itemize{
+\item \code{normalize_log}: Corresponds to \code{Seurat::NormalizeLog}
+}
+
+\itemize{
+\item \code{normalize_tfidf}: This follows the formula from Stuart, Butler et al. 2019, used by default in \code{ArchR::addIterativeLSI()} and \code{Signac::RunTFIDF()}
+}
+}
diff --git a/r/man/partial_apply.Rd b/r/man/partial_apply.Rd
new file mode 100644
index 00000000..c1ea508d
--- /dev/null
+++ b/r/man/partial_apply.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{partial_apply}
+\alias{partial_apply}
+\title{Create partial function calls}
+\usage{
+partial_apply(f, ..., .overwrite = FALSE)
+}
+\arguments{
+\item{f}{A function}
+
+\item{...}{Named arguments to \code{f}}
+
+\item{.overwrite}{(bool) If \code{f} is already an output from
+\code{partial_apply()}, whether parameter re-definitions should
+be ignored or overwrite the existing definitions}
+}
+\value{
+A \code{bpcells_partial} object (a function with some extra attributes)
+}
+\description{
+Specify some but not all arguments to a function.
+}
+\keyword{internal}
diff --git a/r/pkgdown/_pkgdown.yml b/r/pkgdown/_pkgdown.yml
index 64d16f01..0d98b13d 100644
--- a/r/pkgdown/_pkgdown.yml
+++ b/r/pkgdown/_pkgdown.yml
@@ -9,6 +9,11 @@ template:
   includes:
     in_header: |
        <script defer data-domain="benparks.net" src="https://plausible.benparks.net/js/visit-counts.js"></script>
+       <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.15/dist/katex.min.css" integrity="sha384-Htz9HMhiwV8GuQ28Xr9pEs1B4qJiYu/nYLLwlDklR53QibDfmQzi7rYxXhMH/5/u" crossorigin="anonymous">
+       <!-- The loading of KaTeX is deferred to speed up page rendering -->
+       <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.15/dist/katex.min.js" integrity="sha384-bxmi2jLGCvnsEqMuYLKE/KsVCxV3PqmKeK6Y6+lmNXBry6+luFkEOsmp5vD9I/7+" crossorigin="anonymous"></script>
+       <!-- To automatically render math in text elements, include the auto-render extension: -->
+       <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.15/dist/contrib/auto-render.min.js" integrity="sha384-hCXGrW6PitJEwbkoStFjeJxv+fSOOQKOPbJxSfM6G5sWZjAyWhXiTIIAmQqnlLlh" crossorigin="anonymous" onload="renderMathInElement(document.body);"></script>
     after_body: |
        <img src="https://plausible.benparks.net/flask-plausible/bpcells-docs.png" style="position:absolute;" />
 
@@ -130,15 +135,12 @@ reference:
   - IterableMatrix-methods
   - pseudobulk_matrix
 
-- title: "Reference Annotations"
+- title: "Single-cell analysis helpers"
+- subtitle: "Dimensionality reduction"
 - contents:
-  - human_gene_mapping
-  - match_gene_symbol
-  - read_gtf
-  - read_bed
-  - read_ucsc_chrom_sizes
-
-- title: "Clustering"
+  - normalize_log
+  - select_features_variance
+- subtitle: "Clustering"
 - contents:
   - knn_hnsw
   - cluster_graph_leiden
@@ -176,3 +178,11 @@ reference:
   - discrete_palette
   - collect_features
   - rotate_x_labels
+
+- title: "Reference Annotations"
+- contents:
+  - human_gene_mapping
+  - match_gene_symbol
+  - read_gtf
+  - read_bed
+  - read_ucsc_chrom_sizes
diff --git a/r/tests/testthat/test-matrix_transforms.R b/r/tests/testthat/test-matrix_transforms.R
index 385605e0..24cd9c23 100644
--- a/r/tests/testthat/test-matrix_transforms.R
+++ b/r/tests/testthat/test-matrix_transforms.R
@@ -346,3 +346,48 @@ test_that("linear regression works", {
     expect_equal(as(m1, "matrix"), ans)
     expect_equal(as(m1t, "matrix"), ans)
 })
+
+test_that("tf-idf normalization works", {
+    m <- generate_sparse_matrix(5, 5)
+    rownames(m) <- paste0("row", seq_len(nrow(m)))
+    rev_rownames <- rev(rownames(m))
+    # Create tf-idf normalization for dgCMatrix
+    res_dgc <- log1p((diag(1/rowMeans(m)) %*% (m %*% diag(1/colSums(m))) %>% as("dgCMatrix")) * 1e4)
+    
+    rownames(res_dgc) <- rownames(m)
+    m2 <- as(m, "IterableMatrix")
+    # Check that we can pass in row means as a (named) vector
+    row_means <- matrix_stats(m2, row_stats = c("mean"))$row_stats["mean",]
+    # Test that row means ordering does not matter as long as names exist
+    row_means_shuffled <- row_means[sample(1:length(row_means))]
+    # Test that row means can have an extra element as long as all rownames are in the vector
+    row_means_plus_one <- c(row_means, row6 = 1)
+    
+
+    res <- normalize_tfidf(m2)
+    expect_equal(res %>% as("dgCMatrix"), res_dgc, tolerance = 1e-6)
+    res_with_row_means <- normalize_tfidf(m2, feature_means = row_means)
+    res_with_row_means_partial <- normalize_tfidf(feature_means = row_means)(m2)
+    expect_equal(res_with_row_means, res_with_row_means_partial)
+    expect_identical(res, res_with_row_means)
+
+    res_with_shuffled_row_means <- normalize_tfidf(m2, feature_means = row_means_shuffled)
+    expect_identical(res_with_row_means, res_with_shuffled_row_means, res)
+
+    res_with_row_means_with_extra_element <- normalize_tfidf(m2, feature_means = row_means_plus_one)
+    expect_identical(res, res_with_row_means_with_extra_element)
+})
+
+test_that("normalize_log works", {
+    m <- generate_sparse_matrix(5, 5)
+    res_dgc <- m %*% diag(1/colSums(m)) %>% as("dgCMatrix")
+    m2 <- as(m, "IterableMatrix")
+    # Test that default params yield the same as log1p on dgCMatrix
+    res_1 <- as(normalize_log(m2), "dgCMatrix")
+    expect_equal(res_1, log1p(res_dgc*1e4), tolerance = 1e-6)
+    
+    # Test that changing scale factor works
+    res_2 <- as(normalize_log(m2, scale_factor = 1e5), "dgCMatrix")
+    res_2_partial <- as(normalize_log(scale_factor = 1e5)(m2), "dgCMatrix")
+    expect_equal(res_2, log1p(res_dgc*1e5), tolerance = 1e-6)
+})
\ No newline at end of file
diff --git a/r/tests/testthat/test-singlecell_utils.R b/r/tests/testthat/test-singlecell_utils.R
index 85ad0be3..08e41b7c 100644
--- a/r/tests/testthat/test-singlecell_utils.R
+++ b/r/tests/testthat/test-singlecell_utils.R
@@ -13,6 +13,30 @@ generate_sparse_matrix <- function(nrow, ncol, fraction_nonzero = 0.5, max_val =
   as(m, "dgCMatrix")
 }
 
+test_that("select_features works general case", {
+  m1 <- generate_sparse_matrix(100, 50) %>% as("IterableMatrix")
+  for (fn in c("select_features_variance", "select_features_dispersion", "select_features_mean")) {
+    res <- do.call(fn, list(m1, num_feats = 5))
+    expect_equal(nrow(res), nrow(m1)) # Check that dataframe has correct features we're expecting
+    expect_equal(sum(res$highly_variable), 5) # Only 10 features marked as highly variable
+    expect_setequal(res$names, rownames(m1))
+    res_more_feats_than_rows <- do.call(fn, list(m1, num_feats = 10000)) # more features than rows
+    res_feats_equal_rows <- do.call(fn, list(m1, num_feats = 100))
+    res_feats_partial <- get(fn)(num_feats = 100)(m1)
+    expect_identical(res_feats_equal_rows, res_feats_partial)
+    expect_identical(res_more_feats_than_rows, res_feats_equal_rows)
+    if (fn == "select_features_variance") {
+      # Check that normalization actually does something
+      res_no_norm <- do.call(fn, list(m1, num_feats = 10, normalize = NULL))
+      # Check that we can do partial functions on normalization too
+      res_norm_partial <- do.call(fn, list(m1, num_feats = 10, normalize = normalize_log(scale = 1e3, threads = 1L)))
+      res_norm_implicit_partial <- select_features_variance(normalize = normalize_log(scale_factor = 1e3), num_feats = 10)(m1)
+      expect_identical(res_norm_partial, res_norm_implicit_partial)
+      expect_true(!all((res_no_norm %>% dplyr::arrange(names))$score == (res_norm_partial %>% dplyr::arrange(names))$score))
+    }
+  }
+})
+
 test_that("Wilcoxon rank sum works (small)", {
     x <- c(0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46)
     y <- c(1.15, 0.88, 0.90, 0.74, 1.21)
@@ -161,4 +185,5 @@ test_that("Pseudobulk aggregation works with multiple return types", {
       }
     }
   }
-})
\ No newline at end of file
+})
+