Cleaned up docstring for proper API generation.

SingleR-inc · Dec 13, 2024 · c1bd77b · c1bd77b
1 parent 3365a3b
commit c1bd77b
Show file tree

Hide file tree

Showing 9 changed files with 66 additions and 87 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -43,7 +43,7 @@
 try:
     import sphinx
 
-    cmd_line = f"sphinx-apidoc --implicit-namespaces -f -o {output_dir} {module_dir}"
+    cmd_line = f"sphinx-apidoc -M --implicit-namespaces -f -o {output_dir} {module_dir} {module_dir}/lib_*"
 
     args = cmd_line.split(" ")
     if tuple(sphinx.__version__.split(".")) >= ("1", "7"):

diff --git a/docs/index.md b/docs/index.md
@@ -1,25 +1,9 @@
-# singler
-
-Add a short description here!
-
-
-## Note
-
-> This is the main page of your project's [Sphinx] documentation. It is
-> formatted in [Markdown]. Add additional pages by creating md-files in
-> `docs` or rst-files (formatted in [reStructuredText]) and adding links to
-> them in the `Contents` section below.
->
-> Please check [Sphinx] and [MyST] for more information
-> about how to document your project and how to configure your preferences.
-
-
 ## Contents
 
 ```{toctree}
 :maxdepth: 2
 
-Overview <readme>
+Usage <readme>
 Contributions & Help <contributing>
 License <license>
 Authors <authors>

diff --git a/src/singler/annotate_integrated.py b/src/singler/annotate_integrated.py
@@ -49,37 +49,21 @@ def annotate_integrated(
             - A ``SummarizedExperiment`` object containing such a matrix in its assays.
 
         ref_labels:
-            Sequence of the same length as ``ref_data``, where the contents
-            depend on the type of value in the corresponding entry of ``ref_data``:
-
-            - If ``ref_data[i]`` is a matrix-like object, ``ref_labels[i]`` should be
-              a sequence of length equal to the number of columns of ``ref_data[i]``,
-              containing the label associated with each column.
-            - If ``ref_data[i]`` is a ``SummarizedExperiment``, ``ref_labels[i]``
-              may be a string speciying the column name in `column_data` that contains the
-              features. It can also be set to ``None`` to use the row names of the
-              experiment as features.
+            Sequence of the same length as ``ref_data``. The ``i``-th entry
+            should be a sequence of length equal to the number of columns of
+            ``ref_data[i]``, containing the label associated with each column.
 
         test_features:
-            Sequence of length equal to the number of rows in
-            ``test_data``, containing the feature identifier for each row.
-
-            Alternatively, if ``test_data`` is a ``SummarizedExperiment``,
-            ``test_features`` may be a string speciying the column name in
-            `row_data` that contains the features. It can also be set to
-            ``None`` to use the row names of the experiment as features.
+            Sequence of length equal to the number of rows in ``test_data``,
+            containing the feature identifier for each row.  Alternatively
+            ``None``, to use the row names of the experiment as features.
 
         ref_features:
-            Sequence of the same length as ``ref_data``, where the contents
-            depend on the type of value in the corresponding entry of ``ref_data``:
-
-            - If ``ref_data[i]`` is a matrix-like object, ``ref_features[i]`` should be
-              a sequence of length equal to the number of rows of ``ref_data[i]``,
-              containing the feature identifier associated with each row.
-            - If ``ref_data[i]`` is a ``SummarizedExperiment``,
-              ``ref_features[i]`` may be a string speciying the column name in
-              `row_data` that contains the features. It can also be set to
-              ``None`` to use the row names of the experiment as features.
+            Sequence of the same length as ``ref_data``. The ``i``-th entry
+            should be a sequence of length equal to the number of rows of
+            ``ref_data[i]``, containing the feature identifier associated with
+            each row. It can also be set to ``None`` to use the row names of
+            the experiment as features.
 
             This can also be ``None`` to indicate that the row names should be
             used for all references, assuming ``ref_data`` only contains

diff --git a/src/singler/annotate_single.py b/src/singler/annotate_single.py
@@ -60,7 +60,7 @@ def annotate_single(
         ref_features:
             Sequence of length equal to the number of rows of ``ref_data``,
             containing the feature identifier for each row. Alternatively
-            ``None``, to use the `row_names` of the experiment as features.
+            ``None``, to use the row names of the experiment as features.
 
         test_assay_type:
             Assay containing the expression matrix, if ``test_data`` is a

diff --git a/src/singler/classify_integrated.py b/src/singler/classify_integrated.py
@@ -35,16 +35,16 @@ def classify_integrated(
 
         results:
             List of classification results generated by running
-            :py:meth:`~singler.classify_single.classify_single` on
+            :py:func:`~singler.classify_single.classify_single` on
             ``test_data`` with each reference.  References should be ordered as
             in ``integrated_prebuilt.reference_names``.
 
         integrated_prebuilt:
             Integrated reference object, constructed with
-            :py:meth:`~singler.train_integrated.train_integrated`.
+            :py:func:`~singler.train_integrated.train_integrated`.
 
         assay_type:
-            Assay containing the expression matrix, if `test_data` is a
+            Assay containing the expression matrix, if ``test_data`` is a
             :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
 
         quantile:
@@ -70,9 +70,9 @@ def classify_integrated(
         ``best_label`` across all references, defined as the assigned label in
         the best reference; the identity of the ``best_reference``, either as a
         name string or an integer index; the ``scores`` for the best label in
-        each reference, as a nested BiocFrame of BiocFrames; and the ``delta``
-        from the best to the second-best reference. Each row corresponds to a
-        column of ``test``.
+        each reference, as a nested ``BiocFrame``; and the ``delta`` from the
+        best to the second-best reference. Each row corresponds to a column of
+        ``test_data``.
     """
     if isinstance(test_data, summarizedexperiment.SummarizedExperiment):
         test_data = test_data.assay(assay_type)

diff --git a/src/singler/classify_single.py b/src/singler/classify_single.py
@@ -33,10 +33,10 @@ def classify_single(
 
         ref_prebuilt:
             A pre-built reference created with
-            :py:meth:`~singler.build_single_reference.build_single_reference`.
+            :py:func:`~singler.train_single.train_single`.
 
         assay_type:
-            Assay containing the expression matrix, if `test_data` is a
+            Assay containing the expression matrix, if ``test_data`` is a
             :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
 
         quantile:
@@ -57,7 +57,7 @@ def classify_single(
 
     Returns:
         A :py:class:`~BiocFrame.BiocFrame.BiocFrame` containing the ``best``
-        label, the ``scores`` for each label (as a nested BiocFrame), and the
+        label, the ``scores`` for each label as a nested ``BiocFrame``, and the
         ``delta`` from the best to the second-best label. Each row corresponds
         to a column of ``test``. The metadata contains ``markers``, a list of
         the markers from each pairwise comparison between labels; and ``used``,

diff --git a/src/singler/get_classic_markers.py b/src/singler/get_classic_markers.py
@@ -101,40 +101,51 @@ def get_classic_markers(
 
     Args:
         ref_data:
-            A matrix-like object containing the log-normalized expression values of a reference dataset.
-            Each column is a sample and each row is a feature.
+            A matrix-like object containing the log-normalized expression
+            values of a reference dataset.  Each column is a sample and each
+            row is a feature.
             
-            Alternatively, this can be a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`
+            Alternatively, this can be a
+            :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`
             containing a matrix-like object in one of its assays.
             
-            Alternatively, a list of such matrices or ``SummarizedExperiment`` objects,
-            typically for multiple batches of the same reference;
-            it is assumed that different batches exhibit at least some overlap in their ``features`` and ``labels``.
+            Alternatively, a list of such matrices or ``SummarizedExperiment``
+            objects, typically for multiple batches of the same reference; it
+            is assumed that different batches exhibit at least some overlap in
+            their ``ref_features`` and ``ref_labels``.
 
         ref_labels:
-            A sequence of length equal to the number of columns of ``ref``,
+            If ``ref_data`` is not a list, ``ref_labels`` should be a sequence
+            of length equal to the number of columns of ``ref_data``,
             containing a label (usually a string) for each column.
-            Alternatively, a list of such sequences of length equal to that of a list ``ref``;
-            each sequence should have length equal to the number of columns of the corresponding entry of ``ref``.
+
+            If ``ref_data`` is a list, ``ref_labels`` should also be a list of
+            the same length. Each entry should be a sequence of length equal to
+            the number of columns of the corresponding entry of ``ref_data``.
 
         ref_features:
-            A sequence of length equal to the number of rows of ``ref``,
+            If ``ref_data`` is not a list, ``ref_features`` should be a
+            sequence of length equal to the number of rows of ``ref_data``,
             containing the feature name (usually a string) for each row.
-            Alternatively, a list of such sequences of length equal to that of a list ``ref``;
-            each sequence should have length equal to the number of rows of the corresponding entry of ``ref``.
+
+            If ``ref_data`` is a list, ``ref_features`` should also be a list
+            of the same length. Each entry should be a sequence of length
+            equal to the number of rows of the corresponding entry of ``ref``.
 
         assay_type:
-            Name or index of the assay containing the assay of interest,
-            if ``ref`` is or contains
-            :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment` objects.
+            Name or index of the assay of interest, if ``ref`` is or contains
+            :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`
+            objects.
 
         check_missing:
-            Whether to check for and remove rows with missing (NaN) values in the reference matrices.
-            This can be set to False if it is known that no NaN values exist.
+            Whether to check for and remove rows with missing (NaN) values in
+            the reference matrices. This can be set to False if it is known
+            that no NaN values exist.
 
         num_de:
-            Number of differentially expressed genes to use as markers for each pairwise comparison between labels.
-            If None, an appropriate number of genes is automatically determined.
+            Number of differentially expressed genes to use as markers for each
+            pairwise comparison between labels. If ``None``, an appropriate
+            number of genes is automatically determined.
 
         num_threads:
             Number of threads to use for the calculations.

diff --git a/src/singler/train_integrated.py b/src/singler/train_integrated.py
@@ -22,15 +22,15 @@ def __init__(self, ptr, ref_names, ref_labels):
     @property
     def reference_names(self) -> Union[Sequence[str], None]:
         """Sequence containing the names of the references. Alternatively
-        None, if no names were supplied."""
+        ``None``, if no names were supplied."""
         return self._names
 
     @property
     def reference_labels(self) -> list:
         """List of lists containing the names of the labels for each reference.
 
         Each entry corresponds to a reference in :py:attr:`~reference_names`,
-        if ``reference_names`` is not None.
+        if ``reference_names`` is not ``None``.
         """
         return self._labels
 
@@ -50,10 +50,10 @@ def train_integrated(
 
         ref_prebuilt:
             List of prebuilt references, typically created by calling
-            :py:meth:`~singler.build_single_reference.train_single`.
+            :py:meth:`~singler.train_single.train_single`.
 
         ref_names:
-            Sequence of names for the references. If None, these are
+            Sequence of names for the references. If ``None``, these are
             automatically generated.
 
         warn_lost:

diff --git a/src/singler/train_single.py b/src/singler/train_single.py
@@ -13,8 +13,8 @@
 
 class TrainedSingleReference:
     """A prebuilt reference object, typically created by
-    :py:meth:`~singler.build_single_reference.build_single_reference`. This is intended for advanced users only and
-    should not be serialized.
+    :py:meth:`~singler.train_single.train_single`. This is intended for
+    advanced users only and should not be serialized.
     """
 
     def __init__(
@@ -110,7 +110,7 @@ def train_single(
         ref_data:
             A matrix-like object where rows are features, columns are
             reference profiles, and each entry is the expression value.
-            If `markers` is not provided, expression should be normalized
+            If ``markers`` is not provided, expression should be normalized
             and log-transformed in preparation for marker prioritization via
             differential expression analyses. Otherwise, any expression values
             are acceptable as only the ranking within each column is used.
@@ -130,7 +130,7 @@ def train_single(
 
         assay_type:
             Assay containing the expression matrix,
-            if `ref_data` is a
+            if ``ref_data`` is a
             :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
 
         check_missing:
@@ -139,8 +139,8 @@ def train_single(
 
         restrict_to:
             Subset of available features to restrict to. Only features in
-            ``restrict_to`` will be used in the reference building. If None,
-            no restriction is performed.
+            ``restrict_to`` will be used in the reference building. If
+            ``None``, no restriction is performed.
 
         markers:
             Upregulated markers for each pairwise comparison between labels.
@@ -159,9 +159,9 @@ def train_single(
             Further arguments to pass to the chosen marker detection method.
             Only used if ``markers`` is not supplied.
 
-        approximate:
-            Whether to use an approximate neighbor search to compute scores
-            during classification.
+        nn_parameters:
+            Algorithm for constructing the neighbor search index, used to
+            compute scores during classification.
 
         num_threads:
             Number of threads to use for reference building.