Skip to content

Commit

Permalink
Merge branch 'py38-eol' of https://github.com/singler-inc/singler-py
Browse files Browse the repository at this point in the history
…into py38-eol
  • Loading branch information
jkanche committed Dec 13, 2024
2 parents d4d6ad3 + 7eb1e68 commit fd59c61
Show file tree
Hide file tree
Showing 46 changed files with 1,524 additions and 2,116 deletions.
60 changes: 17 additions & 43 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

## Overview

This package provides Python bindings to the [C++ implementation](https://github.com/LTLA/singlepp) of the [SingleR algorithm](https://github.com/LTLA/SingleR),
This package provides Python bindings to the [C++ implementation](https://github.com/SingleR-inc/singlepp) of the [SingleR algorithm](https://github.com/SingleR-inc/SingleR),
originally developed by [Aran et al. (2019)](https://www.nature.com/articles/s41590-018-0276-y).
It is designed to annotate cell types by matching cells to known references based on their expression profiles.
So kind of like Tinder, but for cells.
Expand Down Expand Up @@ -62,7 +62,7 @@ results = singler.annotate_single(
test_data = mat,
test_features = features,
ref_data = ref_data,
ref_labels = "label.main",
ref_labels = ref_data.get_column_data().column("label.main"),
)
```

Expand Down Expand Up @@ -94,23 +94,19 @@ Advanced users may prefer to build the reference and run the classification sepa
This allows us to re-use the same reference for multiple datasets without repeating the build step.

```python
built = singler.build_single_reference(
ref_data=ref_data.assay("logcounts"),
ref_labels=ref_data.col_data.column("label.main"),
ref_features=ref_data.get_row_names(),
restrict_to=features,
built = singler.train_single(
ref_data = ref_data.assay("logcounts"),
ref_labels = ref_data.get_column_data().column("label.main"),
ref_features = ref_data.get_row_names(),
test_features = features,
)
```

And finally, we apply the pre-built reference to the test dataset to obtain our label assignments.
This can be repeated with different datasets that have the same features or a superset of `features`.
This can be repeated with different datasets that have the same features as `test_features=`.

```python
output = singler.classify_single_reference(
mat,
test_features=features,
ref_prebuilt=built,
)
output = singler.classify_single(mat, ref_prebuilt=built)
```

## output
Expand All @@ -134,21 +130,25 @@ import singler
import celldex

blueprint_ref = celldex.fetch_reference("blueprint_encode", "2024-02-26", realize_assays=True)

immune_cell_ref = celldex.fetch_reference("dice", "2024-02-26", realize_assays=True)

single_results, integrated = singler.annotate_integrated(
mat,
features,
ref_data_list = (blueprint_ref, immune_cell_ref),
ref_labels_list = "label.main",
ref_data = [
blueprint_ref,
immune_cell_ref
],
ref_labels = [
blueprint_ref.get_column_data().column("label.main"),
immune_cell_ref.get_column_data().column("label.main")
],
num_threads = 6
)
```

This annotates the test dataset against each reference individually to obtain the best per-reference label,
and then it compares across references to find the best label from all references.
Both the single and integrated annotations are reported for diagnostics.

```python
integrated.column("best_label")
Expand All @@ -174,29 +174,3 @@ integrated.column("best_reference")
## ...
##]
```

## Developer notes

Build the shared object file:

```shell
python setup.py build_ext --inplace
```

For quick testing:

```shell
pytest
```

For more complex testing:

```shell
python setup.py build_ext --inplace && tox
```

To rebuild the **ctypes** bindings with [**cpptypes**](https://github.com/BiocPy/ctypes-wrapper):

```shell
cpptypes src/singler/lib --py src/singler/_cpphelpers.py --cpp src/singler/lib/bindings.cpp --dll _core
```
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
try:
import sphinx

cmd_line = f"sphinx-apidoc --implicit-namespaces -f -o {output_dir} {module_dir}"
cmd_line = f"sphinx-apidoc -M --implicit-namespaces -f -o {output_dir} {module_dir} {module_dir}/lib_*"

args = cmd_line.split(" ")
if tuple(sphinx.__version__.split(".")) >= ("1", "7"):
Expand Down
18 changes: 1 addition & 17 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -1,25 +1,9 @@
# singler

Add a short description here!


## Note

> This is the main page of your project's [Sphinx] documentation. It is
> formatted in [Markdown]. Add additional pages by creating md-files in
> `docs` or rst-files (formatted in [reStructuredText]) and adding links to
> them in the `Contents` section below.
>
> Please check [Sphinx] and [MyST] for more information
> about how to document your project and how to configure your preferences.

## Contents

```{toctree}
:maxdepth: 2
Overview <readme>
Usage <readme>
Contributions & Help <contributing>
License <license>
Authors <authors>
Expand Down
30 changes: 30 additions & 0 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
cmake_minimum_required(VERSION 3.24)

project(singler
VERSION 1.0.0
DESCRIPTION "Building the singler shared library"
LANGUAGES CXX)

# Defining the targets.
find_package(pybind11 CONFIG)

# pybind11 method:
pybind11_add_module(singler
src/find_classic_markers.cpp
src/train_single.cpp
src/classify_single.cpp
src/train_integrated.cpp
src/classify_integrated.cpp
src/init.cpp
)

target_include_directories(singler PRIVATE "${ASSORTHEAD_INCLUDE_DIR}")

set_property(TARGET singler PROPERTY CXX_STANDARD 17)

target_link_libraries(singler PRIVATE pybind11::pybind11)

set_target_properties(singler PROPERTIES
OUTPUT_NAME lib_singler
PREFIX ""
)
63 changes: 63 additions & 0 deletions lib/src/classify_integrated.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include "def.h"
#include "utils.h"

#include "singlepp/singlepp.hpp"
#include "tatami/tatami.hpp"
#include "pybind11/pybind11.h"

#include <vector>
#include <cstdint>
#include <stdexcept>

pybind11::tuple classify_integrated(
const MatrixPointer& test,
const pybind11::list& results,
const TrainedIntegratedPointer& integrated_build,
double quantile,
bool use_fine_tune,
double fine_tune_threshold,
int nthreads)
{
// Setting up the previous results.
size_t num_refs = results.size();
std::vector<const uint32_t*> previous_results;
previous_results.reserve(num_refs);
for (size_t r = 0; r < num_refs; ++r) {
const auto& curres = results[r].cast<pybind11::array>();
previous_results.push_back(check_numpy_array<uint32_t>(curres));
}

// Setting up outputs.
size_t ncells = test->ncol();
pybind11::array_t<MatrixIndex> best(ncells);
pybind11::array_t<MatrixValue> delta(ncells);

singlepp::ClassifyIntegratedBuffers<MatrixIndex, MatrixValue> buffers;
buffers.best = static_cast<MatrixIndex*>(best.request().ptr);
buffers.delta = static_cast<MatrixValue*>(delta.request().ptr);

pybind11::list scores(num_refs);
buffers.scores.resize(num_refs);
for (size_t l = 0; l < num_refs; ++l) {
scores[l] = pybind11::array_t<MatrixValue>(ncells);
buffers.scores[l] = static_cast<MatrixValue*>(scores[l].cast<pybind11::array>().request().ptr);
}

// Running the integrated scoring.
singlepp::ClassifyIntegratedOptions<double> opts;
opts.num_threads = nthreads;
opts.quantile = quantile;
opts.fine_tune = use_fine_tune;
opts.fine_tune_threshold = fine_tune_threshold;
singlepp::classify_integrated(*test, previous_results, *integrated_build, buffers, opts);

pybind11::tuple output(3);
output[0] = best;
output[1] = scores;
output[2] = delta;
return output;
}

void init_classify_integrated(pybind11::module& m) {
m.def("classify_integrated", &classify_integrated);
}
47 changes: 47 additions & 0 deletions lib/src/classify_single.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#include "def.h"
#include "utils.h"

#include "singlepp/singlepp.hpp"
#include "tatami/tatami.hpp"
#include "pybind11/pybind11.h"

#include <vector>
#include <cstdint>
#include <stdexcept>

pybind11::tuple classify_single(const MatrixPointer& test, const TrainedSingleIntersectPointer& built, double quantile, bool use_fine_tune, double fine_tune_threshold, int nthreads) {
// Setting up outputs.
size_t ncells = test->ncol();
pybind11::array_t<MatrixIndex> best(ncells);
pybind11::array_t<MatrixValue> delta(ncells);

singlepp::ClassifySingleBuffers<MatrixIndex, MatrixValue> buffers;
buffers.best = static_cast<MatrixIndex*>(best.request().ptr);
buffers.delta = static_cast<MatrixValue*>(delta.request().ptr);

size_t nlabels = built->num_labels();
pybind11::list scores(nlabels);
buffers.scores.resize(nlabels);
for (size_t l = 0; l < nlabels; ++l) {
scores[l] = pybind11::array_t<MatrixValue>(ncells);
buffers.scores[l] = static_cast<MatrixValue*>(scores[l].cast<pybind11::array>().request().ptr);
}

// Running the analysis.
singlepp::ClassifySingleOptions opts;
opts.num_threads = nthreads;
opts.quantile = quantile;
opts.fine_tune = use_fine_tune;
opts.fine_tune_threshold = fine_tune_threshold;
singlepp::classify_single_intersect(*test, *built, buffers, opts);

pybind11::tuple output(3);
output[0] = best;
output[1] = scores;
output[2] = delta;
return output;
}

void init_classify_single(pybind11::module& m) {
m.def("classify_single", &classify_single);
}
20 changes: 20 additions & 0 deletions lib/src/def.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#ifndef DEF_H
#define DEF_H

#include <cstdint>
#include <memory>
#include "tatami/tatami.hpp"
#include "singlepp/singlepp.hpp"

typedef uint32_t MatrixIndex;
typedef double MatrixValue;
typedef std::shared_ptr<tatami::Matrix<MatrixValue, MatrixIndex> > MatrixPointer;

typedef std::shared_ptr<knncolle::Builder<knncolle::SimpleMatrix<uint32_t, uint32_t, double>, double> > BuilderPointer;

typedef singlepp::TrainedSingleIntersect<MatrixIndex, MatrixValue> TrainedSingleIntersect;
typedef std::shared_ptr<TrainedSingleIntersect> TrainedSingleIntersectPointer;
typedef singlepp::TrainedIntegrated<MatrixIndex> TrainedIntegrated;
typedef std::shared_ptr<TrainedIntegrated> TrainedIntegratedPointer;

#endif
64 changes: 64 additions & 0 deletions lib/src/find_classic_markers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#include "def.h"
#include "utils.h"

#include "singlepp/singlepp.hpp"
#include "tatami/tatami.hpp"
#include "pybind11/pybind11.h"

#include <vector>
#include <cstdint>
#include <stdexcept>

pybind11::list find_classic_markers(uint32_t num_labels, uint32_t num_genes, const pybind11::list& reference, const pybind11::list& labels, int de_n, int nthreads) {
size_t num_ref = reference.size();
if (num_ref != static_cast<size_t>(labels.size())) {
throw std::runtime_error("'ref' and 'labels' should have the same length");
}

std::vector<const tatami::Matrix<MatrixValue, MatrixIndex>*> ref_ptrs;
ref_ptrs.reserve(num_ref);
std::vector<const uint32_t*> lab_ptrs;
lab_ptrs.reserve(num_ref);

for (size_t r = 0; r < num_ref; ++r) {
auto ptr = reference[r].cast<MatrixPointer>().get();
ref_ptrs.emplace_back(ptr);
if (ptr->nrow() != num_genes) {
throw std::runtime_error("each entry of 'ref' should have number of rows equal to 'ngenes'");
}

// No copy, so it's fine to create a pointer and discard the casted array.
auto lab = labels[r].cast<pybind11::array>();
if (lab.size() != static_cast<size_t>(ptr->ncol())) {
throw std::runtime_error("number of columns in each 'ref' should equal the length of the corresponding entry of 'labels'");
}

lab_ptrs.push_back(check_numpy_array<uint32_t>(lab));
}

singlepp::ChooseClassicMarkersOptions opts;
opts.number = de_n;
opts.num_threads = nthreads;
auto store = singlepp::choose_classic_markers(ref_ptrs, lab_ptrs, opts);

pybind11::list output(num_labels);
for (uint32_t l = 0; l < num_labels; ++l) {
const auto& src = store[l];
pybind11::list dest(num_labels);
for (uint32_t l2 = 0; l2 < num_labels; ++l2) {
dest[l2] = pybind11::array_t<MatrixIndex>(src[l2].size(), src[l2].data());
}
output[l] = dest;
}

return output;
}

uint32_t number_of_classic_markers(uint32_t num_labels) {
return singlepp::number_of_classic_markers(num_labels);
}

void init_find_classic_markers(pybind11::module& m) {
m.def("find_classic_markers", &find_classic_markers);
m.def("number_of_classic_markers", &number_of_classic_markers);
}
19 changes: 19 additions & 0 deletions lib/src/init.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include "def.h"
#include "pybind11/pybind11.h"

void init_find_classic_markers(pybind11::module&);
void init_train_single(pybind11::module&);
void init_classify_single(pybind11::module&);
void init_train_integrated(pybind11::module&);
void init_classify_integrated(pybind11::module&);

PYBIND11_MODULE(lib_singler, m) {
init_find_classic_markers(m);
init_train_single(m);
init_classify_single(m);
init_train_integrated(m);
init_classify_integrated(m);

pybind11::class_<TrainedSingleIntersect, TrainedSingleIntersectPointer>(m, "TrainSingleIntersect");
pybind11::class_<TrainedIntegrated, TrainedIntegratedPointer>(m, "TrainIntegrated");
}
Loading

0 comments on commit fd59c61

Please sign in to comment.