diff --git a/docs/conf.py b/docs/conf.py index 3495622..5ec32e8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -43,7 +43,7 @@ try: import sphinx - cmd_line = f"sphinx-apidoc --implicit-namespaces -f -o {output_dir} {module_dir}" + cmd_line = f"sphinx-apidoc -M --implicit-namespaces -f -o {output_dir} {module_dir} {module_dir}/lib_*" args = cmd_line.split(" ") if tuple(sphinx.__version__.split(".")) >= ("1", "7"): diff --git a/docs/index.md b/docs/index.md index 8362299..8d88c79 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,25 +1,9 @@ -# singler - -Add a short description here! - - -## Note - -> This is the main page of your project's [Sphinx] documentation. It is -> formatted in [Markdown]. Add additional pages by creating md-files in -> `docs` or rst-files (formatted in [reStructuredText]) and adding links to -> them in the `Contents` section below. -> -> Please check [Sphinx] and [MyST] for more information -> about how to document your project and how to configure your preferences. - - ## Contents ```{toctree} :maxdepth: 2 -Overview +Usage Contributions & Help License Authors diff --git a/src/singler/annotate_integrated.py b/src/singler/annotate_integrated.py index 4de6ce9..e1de142 100644 --- a/src/singler/annotate_integrated.py +++ b/src/singler/annotate_integrated.py @@ -49,37 +49,21 @@ def annotate_integrated( - A ``SummarizedExperiment`` object containing such a matrix in its assays. ref_labels: - Sequence of the same length as ``ref_data``, where the contents - depend on the type of value in the corresponding entry of ``ref_data``: - - - If ``ref_data[i]`` is a matrix-like object, ``ref_labels[i]`` should be - a sequence of length equal to the number of columns of ``ref_data[i]``, - containing the label associated with each column. - - If ``ref_data[i]`` is a ``SummarizedExperiment``, ``ref_labels[i]`` - may be a string speciying the column name in `column_data` that contains the - features. It can also be set to ``None`` to use the row names of the - experiment as features. + Sequence of the same length as ``ref_data``. The ``i``-th entry + should be a sequence of length equal to the number of columns of + ``ref_data[i]``, containing the label associated with each column. test_features: - Sequence of length equal to the number of rows in - ``test_data``, containing the feature identifier for each row. - - Alternatively, if ``test_data`` is a ``SummarizedExperiment``, - ``test_features`` may be a string speciying the column name in - `row_data` that contains the features. It can also be set to - ``None`` to use the row names of the experiment as features. + Sequence of length equal to the number of rows in ``test_data``, + containing the feature identifier for each row. Alternatively + ``None``, to use the row names of the experiment as features. ref_features: - Sequence of the same length as ``ref_data``, where the contents - depend on the type of value in the corresponding entry of ``ref_data``: - - - If ``ref_data[i]`` is a matrix-like object, ``ref_features[i]`` should be - a sequence of length equal to the number of rows of ``ref_data[i]``, - containing the feature identifier associated with each row. - - If ``ref_data[i]`` is a ``SummarizedExperiment``, - ``ref_features[i]`` may be a string speciying the column name in - `row_data` that contains the features. It can also be set to - ``None`` to use the row names of the experiment as features. + Sequence of the same length as ``ref_data``. The ``i``-th entry + should be a sequence of length equal to the number of rows of + ``ref_data[i]``, containing the feature identifier associated with + each row. It can also be set to ``None`` to use the row names of + the experiment as features. This can also be ``None`` to indicate that the row names should be used for all references, assuming ``ref_data`` only contains diff --git a/src/singler/annotate_single.py b/src/singler/annotate_single.py index 8f1da09..11914c6 100644 --- a/src/singler/annotate_single.py +++ b/src/singler/annotate_single.py @@ -60,7 +60,7 @@ def annotate_single( ref_features: Sequence of length equal to the number of rows of ``ref_data``, containing the feature identifier for each row. Alternatively - ``None``, to use the `row_names` of the experiment as features. + ``None``, to use the row names of the experiment as features. test_assay_type: Assay containing the expression matrix, if ``test_data`` is a diff --git a/src/singler/classify_integrated.py b/src/singler/classify_integrated.py index c73a1a9..0ee2009 100644 --- a/src/singler/classify_integrated.py +++ b/src/singler/classify_integrated.py @@ -35,16 +35,16 @@ def classify_integrated( results: List of classification results generated by running - :py:meth:`~singler.classify_single.classify_single` on + :py:func:`~singler.classify_single.classify_single` on ``test_data`` with each reference. References should be ordered as in ``integrated_prebuilt.reference_names``. integrated_prebuilt: Integrated reference object, constructed with - :py:meth:`~singler.train_integrated.train_integrated`. + :py:func:`~singler.train_integrated.train_integrated`. assay_type: - Assay containing the expression matrix, if `test_data` is a + Assay containing the expression matrix, if ``test_data`` is a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`. quantile: @@ -70,9 +70,9 @@ def classify_integrated( ``best_label`` across all references, defined as the assigned label in the best reference; the identity of the ``best_reference``, either as a name string or an integer index; the ``scores`` for the best label in - each reference, as a nested BiocFrame of BiocFrames; and the ``delta`` - from the best to the second-best reference. Each row corresponds to a - column of ``test``. + each reference, as a nested ``BiocFrame``; and the ``delta`` from the + best to the second-best reference. Each row corresponds to a column of + ``test_data``. """ if isinstance(test_data, summarizedexperiment.SummarizedExperiment): test_data = test_data.assay(assay_type) diff --git a/src/singler/classify_single.py b/src/singler/classify_single.py index fa79ffa..aacbe51 100644 --- a/src/singler/classify_single.py +++ b/src/singler/classify_single.py @@ -33,10 +33,10 @@ def classify_single( ref_prebuilt: A pre-built reference created with - :py:meth:`~singler.build_single_reference.build_single_reference`. + :py:func:`~singler.train_single.train_single`. assay_type: - Assay containing the expression matrix, if `test_data` is a + Assay containing the expression matrix, if ``test_data`` is a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`. quantile: @@ -57,7 +57,7 @@ def classify_single( Returns: A :py:class:`~BiocFrame.BiocFrame.BiocFrame` containing the ``best`` - label, the ``scores`` for each label (as a nested BiocFrame), and the + label, the ``scores`` for each label as a nested ``BiocFrame``, and the ``delta`` from the best to the second-best label. Each row corresponds to a column of ``test``. The metadata contains ``markers``, a list of the markers from each pairwise comparison between labels; and ``used``, diff --git a/src/singler/get_classic_markers.py b/src/singler/get_classic_markers.py index 3ca7f2a..1e6a54c 100644 --- a/src/singler/get_classic_markers.py +++ b/src/singler/get_classic_markers.py @@ -101,40 +101,51 @@ def get_classic_markers( Args: ref_data: - A matrix-like object containing the log-normalized expression values of a reference dataset. - Each column is a sample and each row is a feature. + A matrix-like object containing the log-normalized expression + values of a reference dataset. Each column is a sample and each + row is a feature. - Alternatively, this can be a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment` + Alternatively, this can be a + :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment` containing a matrix-like object in one of its assays. - Alternatively, a list of such matrices or ``SummarizedExperiment`` objects, - typically for multiple batches of the same reference; - it is assumed that different batches exhibit at least some overlap in their ``features`` and ``labels``. + Alternatively, a list of such matrices or ``SummarizedExperiment`` + objects, typically for multiple batches of the same reference; it + is assumed that different batches exhibit at least some overlap in + their ``ref_features`` and ``ref_labels``. ref_labels: - A sequence of length equal to the number of columns of ``ref``, + If ``ref_data`` is not a list, ``ref_labels`` should be a sequence + of length equal to the number of columns of ``ref_data``, containing a label (usually a string) for each column. - Alternatively, a list of such sequences of length equal to that of a list ``ref``; - each sequence should have length equal to the number of columns of the corresponding entry of ``ref``. + + If ``ref_data`` is a list, ``ref_labels`` should also be a list of + the same length. Each entry should be a sequence of length equal to + the number of columns of the corresponding entry of ``ref_data``. ref_features: - A sequence of length equal to the number of rows of ``ref``, + If ``ref_data`` is not a list, ``ref_features`` should be a + sequence of length equal to the number of rows of ``ref_data``, containing the feature name (usually a string) for each row. - Alternatively, a list of such sequences of length equal to that of a list ``ref``; - each sequence should have length equal to the number of rows of the corresponding entry of ``ref``. + + If ``ref_data`` is a list, ``ref_features`` should also be a list + of the same length. Each entry should be a sequence of length + equal to the number of rows of the corresponding entry of ``ref``. assay_type: - Name or index of the assay containing the assay of interest, - if ``ref`` is or contains - :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment` objects. + Name or index of the assay of interest, if ``ref`` is or contains + :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment` + objects. check_missing: - Whether to check for and remove rows with missing (NaN) values in the reference matrices. - This can be set to False if it is known that no NaN values exist. + Whether to check for and remove rows with missing (NaN) values in + the reference matrices. This can be set to False if it is known + that no NaN values exist. num_de: - Number of differentially expressed genes to use as markers for each pairwise comparison between labels. - If None, an appropriate number of genes is automatically determined. + Number of differentially expressed genes to use as markers for each + pairwise comparison between labels. If ``None``, an appropriate + number of genes is automatically determined. num_threads: Number of threads to use for the calculations. diff --git a/src/singler/train_integrated.py b/src/singler/train_integrated.py index e4bcc46..600e1b0 100644 --- a/src/singler/train_integrated.py +++ b/src/singler/train_integrated.py @@ -22,7 +22,7 @@ def __init__(self, ptr, ref_names, ref_labels): @property def reference_names(self) -> Union[Sequence[str], None]: """Sequence containing the names of the references. Alternatively - None, if no names were supplied.""" + ``None``, if no names were supplied.""" return self._names @property @@ -30,7 +30,7 @@ def reference_labels(self) -> list: """List of lists containing the names of the labels for each reference. Each entry corresponds to a reference in :py:attr:`~reference_names`, - if ``reference_names`` is not None. + if ``reference_names`` is not ``None``. """ return self._labels @@ -50,10 +50,10 @@ def train_integrated( ref_prebuilt: List of prebuilt references, typically created by calling - :py:meth:`~singler.build_single_reference.train_single`. + :py:meth:`~singler.train_single.train_single`. ref_names: - Sequence of names for the references. If None, these are + Sequence of names for the references. If ``None``, these are automatically generated. warn_lost: diff --git a/src/singler/train_single.py b/src/singler/train_single.py index 5bb2834..20c9de7 100644 --- a/src/singler/train_single.py +++ b/src/singler/train_single.py @@ -13,8 +13,8 @@ class TrainedSingleReference: """A prebuilt reference object, typically created by - :py:meth:`~singler.build_single_reference.build_single_reference`. This is intended for advanced users only and - should not be serialized. + :py:meth:`~singler.train_single.train_single`. This is intended for + advanced users only and should not be serialized. """ def __init__( @@ -110,7 +110,7 @@ def train_single( ref_data: A matrix-like object where rows are features, columns are reference profiles, and each entry is the expression value. - If `markers` is not provided, expression should be normalized + If ``markers`` is not provided, expression should be normalized and log-transformed in preparation for marker prioritization via differential expression analyses. Otherwise, any expression values are acceptable as only the ranking within each column is used. @@ -130,7 +130,7 @@ def train_single( assay_type: Assay containing the expression matrix, - if `ref_data` is a + if ``ref_data`` is a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`. check_missing: @@ -139,8 +139,8 @@ def train_single( restrict_to: Subset of available features to restrict to. Only features in - ``restrict_to`` will be used in the reference building. If None, - no restriction is performed. + ``restrict_to`` will be used in the reference building. If + ``None``, no restriction is performed. markers: Upregulated markers for each pairwise comparison between labels. @@ -159,9 +159,9 @@ def train_single( Further arguments to pass to the chosen marker detection method. Only used if ``markers`` is not supplied. - approximate: - Whether to use an approximate neighbor search to compute scores - during classification. + nn_parameters: + Algorithm for constructing the neighbor search index, used to + compute scores during classification. num_threads: Number of threads to use for reference building.