Fix doc string errors.

newmana · newmana · commit dcf114454121 · 2025-10-21T09:06:59.000+10:00
diff --git a/docs/release_notes/1.1.5.rst b/docs/release_notes/1.1.5.rst
@@ -2,5 +2,6 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. rubric:: Features
+
 * Add Leiden clustering wrapper.
-* Fix documentation, refactor code in spatial.SME.
+* Fix documentation, refactor code in spatial.SME.
diff --git a/docs/release_notes/1.2.2.rst b/docs/release_notes/1.2.2.rst
@@ -2,9 +2,11 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. rubric:: Features
+
 * Added support for Python 3.11 and 3.12.
 * Upgraded scanpy to 1.11 - clustering will be different.
 * Added more CCI tests.
 
 .. rubric:: Bugs
-* Fixed copy-paste error in louvain.py file.
+
+* Fixed copy-paste error in louvain.py file.
diff --git a/stlearn/embedding/ica.py b/stlearn/embedding/ica.py
@@ -26,21 +26,22 @@ def run_ica(
         or 'cube'.
         You can also provide your own function. It should return a tuple
         containing the value of the function, and of its derivative, in the
-        point. Example:
-        def my_g(x):
-            return x ** 3, (3 * x ** 2).mean(axis=-1)
+        point. Example::
+
+           def my_g(x):
+                return x ** 3, (3 * x ** 2).mean(axis=-1)
     tol
         Tolerance on update at each iteration.
     use_data
         if None, then using all the gene expression profile. Else, use
         the chosen data from adata.obsm.
     copy
         Return a copy instead of writing to adata.
+
     Returns
     -------
     Depending on `copy`, returns or updates `adata` with the following fields.
     `X_ica` : :class:`numpy.ndarray` (`adata.obsm`)
-        Independent Component Analysis representation of data.
     """
 
     adata = adata.copy() if copy else adata
diff --git a/stlearn/embedding/pca.py b/stlearn/embedding/pca.py
@@ -20,9 +20,11 @@ def run_pca(
 ) -> AnnData | None:
     """\
     Wrap function scanpy.pp.pca
+
     Principal component analysis [Pedregosa11]_.
     Computes PCA coordinates, loadings and variance decomposition.
     Uses the implementation of *scikit-learn* [Pedregosa11]_.
+
     Parameters
     ----------
     data
@@ -38,12 +40,12 @@ def run_pca(
         Passing `None` decides automatically based on sparseness of the data.
     svd_solver
         SVD solver to use:
-        `'arpack'` (the default - deterministic)
-          for the ARPACK wrapper in SciPy (:func:`~scipy.sparse.linalg.svds`)
-        `'randomized'`
-          for the randomized algorithm due to Halko (2009).
-        `'auto'`
-          chooses automatically depending on the size of the problem.
+
+        - `'arpack'` (the default - deterministic) for the ARPACK wrapper in
+        SciPy (:func:`~scipy.sparse.linalg.svds`)
+        - `'randomized'` for the randomized algorithm due to Halko (2009).
+        - `'auto'` chooses automatically depending on the size of the problem.
+
     random_state
         Change to use different initial states for the optimization.
     return_info
@@ -52,7 +54,7 @@ def run_pca(
     use_highly_variable
         Whether to use highly variable genes only, stored in
         `.var['highly_variable']`.
-        By default uses them if they have been determined beforehand.
+        By default, uses them if they have been determined beforehand.
     dtype
         Numpy data type string to which to convert the result.
     copy
@@ -65,22 +67,21 @@ def run_pca(
     chunk_size
         Number of observations to include in each chunk.
         Required if `chunked=True` was passed.
+
     Returns
     -------
-    X_pca : :class:`~scipy.sparse.spmatrix`, :class:`~numpy.ndarray`
+    X_pca: :class:`~scipy.sparse.spmatrix`, :class:`~numpy.ndarray`
         If `data` is array-like and `return_info=False` was passed,
-        this function only returns `X_pca`…
-    adata : anndata.AnnData
-        …otherwise if `copy=True` it returns or else adds fields to `adata`:
-        `.obsm['X_pca']`
-             PCA representation of data.
-        `.varm['PCs']`
-             The principal components containing the loadings.
-        `.uns['pca']['variance_ratio']`
-             Ratio of explained variance.
-        `.uns['pca']['variance']`
-             Explained variance, equivalent to the eigenvalues of the
-             covariance matrix.
+        this function only returns `X_pca`.
+    adata: anndata.AnnData
+        Otherwise if `copy=True` it returns or else adds fields to `adata`:
+
+        - `.obsm['X_pca']` - PCA representation of data.
+        - `.varm['PCs']` - The principal components containing the loadings.
+        - `.uns['pca']['variance_ratio']` - Ratio of explained variance.
+        - `.uns['pca']['variance']` - Explained variance, equivalent to the
+        eigenvalues of the covariance matrix.
+
     """
 
     adata = scanpy.pp.pca(
diff --git a/stlearn/embedding/umap.py b/stlearn/embedding/umap.py
@@ -26,7 +26,9 @@ def run_umap(
 ) -> AnnData | None:
     """\
     Wrap function scanpy.pp.umap
+
     Embed the neighborhood graph using UMAP [McInnes18]_.
+
     UMAP (Uniform Manifold Approximation and Projection) is a manifold learning
     technique suitable for visualizing high-dimensional data. Besides tending to
     be faster than tSNE, it optimizes the embedding such that it best reflects
@@ -37,6 +39,7 @@ def run_umap(
     implementation of `umap-learn <https://github.com/lmcinnes/umap>`__
     [McInnes18]_. For a few comparisons of UMAP with tSNE, see this `preprint
     <https://doi.org/10.1101/298430>`__.
+
     Parameters
     ----------
     adata
@@ -48,6 +51,7 @@ def run_umap(
         If `RandomState`, `random_state` is the random number generator;
         If `None`, the random number generator is the `RandomState` instance used
         by `np.random`.
+
     Returns
     -------
     Depending on `copy`, returns or updates `adata` with the following fields.
diff --git a/stlearn/preprocessing/filter_genes.py b/stlearn/preprocessing/filter_genes.py
@@ -15,11 +15,14 @@ def filter_genes(
     Wrap function scanpy.pp.filter_genes
 
     Filter genes based on number of cells or counts.
+
     Keep genes that have at least `min_counts` counts or are expressed in at
     least `min_cells` cells or have at most `max_counts` counts or are expressed
     in at most `max_cells` cells.
+
     Only provide one of the optional parameters `min_counts`, `min_cells`,
     `max_counts`, `max_cells` per call.
+
     Parameters
     ----------
     adata
@@ -35,6 +38,7 @@ def filter_genes(
         Maximum number of cells expressed required for a gene to pass filtering.
     inplace
         Perform computation inplace or return result.
+
     Returns
     -------
     Depending on `inplace`, returns the following arrays or directly subsets
diff --git a/stlearn/preprocessing/graph.py b/stlearn/preprocessing/graph.py
@@ -54,11 +54,12 @@ def neighbors(
     the connectivity of the manifold (`method=='umap'`). If `method=='gauss'`,
     connectivities are computed according to [Coifman05]_, in the adaption of
     [Haghverdi16]_.
+
     Parameters
     ----------
-    adata
+    adata:
         Annotated data matrix.
-    n_neighbors
+    n_neighbors:
         The size of local neighborhood (in terms of number of neighboring data
         points) used for manifold approximation. Larger values result in more
         global views of the manifold, while smaller values result in more local
@@ -68,22 +69,23 @@ def neighbors(
         `n_neighbors` neighbor.
     {n_pcs}
     {use_rep}
-    knn
+    knn:
         If `True`, use a hard threshold to restrict the number of neighbors to
         `n_neighbors`, that is, consider a knn graph. Otherwise, use a Gaussian
         Kernel to assign low weights to neighbors more distant than the
         `n_neighbors` nearest neighbor.
-    random_state
+    random_state:
         A numpy random seed.
-    method
+    method:
         Use 'umap' [McInnes18]_ or 'gauss' (Gauss kernel following [Coifman05]_
         with adaptive width [Haghverdi16]_) for computing connectivities.
-    metric
+    metric:
         A known metric’s name or a callable that returns a distance.
-    metric_kwds
+    metric_kwds:
         Options for the metric.
-    copy
+    copy:
         Return a copy instead of writing to adata.
+
     Returns
     -------
     Depending on `copy`, updates or returns `adata` with the following:
diff --git a/stlearn/preprocessing/log_scale.py b/stlearn/preprocessing/log_scale.py
@@ -18,21 +18,23 @@ def log1p(
     Logarithmize the data matrix.
     Computes :math:`X = \\log(X + 1)`,
     where :math:`log` denotes the natural logarithm unless a different base is given.
+
     Parameters
     ----------
-    data
+    data:
         The (annotated) data matrix of shape `n_obs` × `n_vars`.
         Rows correspond to cells and columns to genes.
-    copy
+    copy:
         If an :class:`~anndata.AnnData` is passed, determines whether a copy
         is returned.
-    chunked
+    chunked:
         Process the data matrix in chunks, which will save memory.
         Applies only to :class:`~anndata.AnnData`.
-    chunk_size
+    chunk_size:
         `n_obs` of the chunks to process the data in.
-    base
+    base:
         Base of the logarithm. Natural logarithm is used by default.
+
     Returns
     -------
     Returns or updates `data`, depending on `copy`.
@@ -55,10 +57,12 @@ def scale(
     Wrap function of scanpy.pp.scale
 
     Scale data to unit variance and zero mean.
+
     .. note::
-        Variables (genes) that do not display any variation (are constant across
-        all observations) are retained and set to 0 during this operation. In
-        the future, they might be set to NaNs.
+       Variables (genes) that do not display any variation (are constant across
+       all observations) are retained and set to 0 during this operation. In
+       the future, they might be set to NaNs.
+
     Parameters
     ----------
     data:
@@ -72,6 +76,7 @@ def scale(
     copy
         If an :class:`~anndata.AnnData` is passed,
         determines whether a copy is returned.
+
     Returns
     -------
     Depending on `copy` returns or updates `data` with a scaled `data.X`.
diff --git a/stlearn/preprocessing/normalize.py b/stlearn/preprocessing/normalize.py
@@ -13,21 +13,22 @@ def normalize_total(
     inplace: bool = True,
 ) -> dict[str, np.ndarray] | None:
     """\
-    Wrap function from scanpy.pp.log1p
-    Normalize counts per cell.
+    Wrap function from scanpy.pp.log1p - normalize counts per cell.
+
     If choosing `target_sum=1e6`, this is CPM normalization.
     If `exclude_highly_expressed=True`, very highly expressed genes are excluded
     from the computation of the normalization factor (size factor) for each
     cell. This is meaningful as these can strongly influence the resulting
     normalized values for all other genes [Weinreb17]_.
     Similar functions are used, for example, by Seurat [Satija15]_, Cell Ranger
-    [Zheng17]_ or SPRING [Weinreb17]_.
-    Params
-    ------
-    adata
+    [Zheng17]_ or SPRING [Weinreb16]_.
+
+    Parameters
+    ----------
+    adata:
         The annotated data matrix of shape `n_obs` × `n_vars`.
         Rows correspond to cells and columns to genes.
-    target_sum
+    target_sum:
         If `None`, after normalization, each observation (cell) has a total
         count equal to the median of total counts for observations (cells)
         before normalization.
@@ -37,16 +38,17 @@ def normalize_total(
         highly expressed, if it has more than `max_fraction` of the total counts
         in at least one cell. The not-excluded genes will sum up to
         `target_sum`.
-    max_fraction
+    max_fraction:
         If `exclude_highly_expressed=True`, consider cells as highly expressed
         that have more counts than `max_fraction` of the original total counts
         in at least one cell.
-    key_added
+    key_added:
         Name of the field in `adata.obs` where the normalization factor is
         stored.
-    inplace
+    inplace:
         Whether to update `adata` or return dictionary with normalized copies of
         `adata.X` and `adata.layers`.
+
     Returns
     -------
     Returns dictionary with normalized copies of `adata.X` and `adata.layers`
diff --git a/stlearn/tl/clustering/leiden.py b/stlearn/tl/clustering/leiden.py
@@ -22,9 +22,11 @@ def leiden(
 ) -> AnnData | None:
     """\
     Wrap function scanpy.tl.leiden
+
     This requires having ran :func:`~scanpy.pp.neighbors` or
     :func:`~scanpy.external.pp.bbknn` first,
     or explicitly passing a ``adjacency`` matrix.
+
     Parameters
     ----------
     adata:
@@ -58,14 +60,16 @@ def leiden(
         `obsp` and `neighbors_key` at the same time.
     copy:
         Copy adata or modify it inplace.
+
     Returns
     -------
-    :obj:`None`
+    None or AnnData
         By default (``copy=False``), updates ``adata`` with the following fields:
-        ``adata.obs['leiden' | key_added]`` (:class:`pandas.Series`, dtype ``category``)
-            Array of dim (number of samples) that stores the subgroup id
-            (``'0'``, ``'1'``, ...) for each cell.
-    :class:`~anndata.AnnData`
+
+        - ``adata.obs['leiden']`` (:class:`pandas.Series`, dtype ``category``) -
+          Array of dim (number of samples) that stores the subgroup id
+          (``'0'``, ``'1'``, ...) for each cell.
+
         When ``copy=True`` is set, a copy of ``adata`` with those fields is returned.
     """
 
diff --git a/stlearn/tl/clustering/louvain.py b/stlearn/tl/clustering/louvain.py