diff --git a/README.md b/README.md
index cee48d7..acea211 100644
--- a/README.md
+++ b/README.md
@@ -28,22 +28,28 @@ Firstly, let's load in the famous PBMC 4k dataset from 10X Genomics:
 
 ```python
 import singlecellexperiment as sce
-data = sce.read_tenx_h5("pbmc4k-tenx.h5")
+data = sce.read_tenx_h5("pbmc4k-tenx.h5", realize_assays=True)
 mat = data.assay("counts")
 features = [str(x) for x in data.row_data["name"]]
 ```
 
-Now we use the Blueprint/ENCODE reference to annotate each cell in `mat`:
+Now, we fetch the Blueprint/ENCODE reference:
+
+```python
+import celldex
+
+ref_data = celldex.fetch_reference("blueprint_encode", "2024-02-26", realize_assays=True)
+```
+
+We can annotate each cell in `mat` with the reference:
 
 ```python
 import singler
 results = singler.annotate_single(
-    mat,
-    features,
-    ref_data = "BlueprintEncode",
-    ref_features = "symbol",
-    ref_labels = "main",
-    cache_dir = "_cache"
+    test_data = mat,
+    test_features = features,
+    ref_data = ref_data,
+    ref_labels = "label.main",
 )
 ```
 
@@ -74,34 +80,12 @@ The `annotate_single()` function is a convenient wrapper around a number of lowe
 Advanced users may prefer to build the reference and run the classification separately.
 This allows us to re-use the same reference for multiple datasets without repeating the build step.
 
-We start by fetching the reference of interest from [GitHub](https://github.com/kanaverse/singlepp-references).
-Note the use of `cache_dir` to avoid repeated downloads from GitHub.
-
-```python
-ref = singler.fetch_github_reference("BlueprintEncode", cache_dir="_cache")
-```
-
-We'll be using the gene symbols here with the markers for the main labels.
-We need to set `restrict_to` to the features in our test data, so as to avoid picking marker genes in the reference that won't be present in the test.
-
-```python
-ref_features = ref.row_data.column("symbol")
-
-markers = singler.realize_github_markers(
-    ref.metadata["main"],
-    ref_features,
-    restrict_to=set(features),
-)
-```
-
-Now we build the reference from the ranked expression values and the associated labels in the reference:
-
 ```python
 built = singler.build_single_reference(
-    ref_data=ref.assay("ranks"),
-    ref_labels=ref.col_data.column("main"),
-    ref_features=ref_features,
-    markers=markers,
+    ref_data=ref_data.assay("logcounts"),
+    ref_labels=ref_data.col_data.column("label.main"),
+    ref_features=ref_data.get_row_names(),
+    restrict_to=features,
 )
 ```
 
@@ -134,14 +118,17 @@ We can use annotations from multiple references through the `annotate_integrated
 
 ```python
 import singler
+import celldex
+
+blueprint_ref = celldex.fetch_reference("blueprint_encode", "2024-02-26", realize_assays=True)
+
+immune_cell_ref = celldex.fetch_reference("dice", "2024-02-26", realize_assays=True)
+
 single_results, integrated = singler.annotate_integrated(
     mat,
     features,
-    ref_data_list = ("BlueprintEncode", "DatabaseImmuneCellExpression"),
-    ref_features_list= "symbol",
-    ref_labels_list = "main",
-    build_integrated_args = { "ref_names": ("Blueprint", "DICE") },
-    cache_dir = "_cache",
+    ref_data_list = (blueprint_ref, immune_cell_ref),
+    ref_labels_list = "label.main",
     num_threads = 6
 )
 ```
diff --git a/setup.cfg b/setup.cfg
index 59d2073..796ce89 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -54,6 +54,7 @@ install_requires =
     delayedarray
     biocframe>=0.5.0
     summarizedexperiment>=0.4.0
+    singlecellexperiment>=0.4.6
     biocutils
 
 [options.packages.find]
@@ -71,6 +72,9 @@ testing =
     setuptools
     pytest
     pytest-cov
+    celldex
+    scrnaseq
+    scipy
 
 [options.entry_points]
 # Add here console scripts like:
diff --git a/src/singler/__init__.py b/src/singler/__init__.py
index dc17568..8c512e7 100644
--- a/src/singler/__init__.py
+++ b/src/singler/__init__.py
@@ -16,11 +16,10 @@
     del version, PackageNotFoundError
 
 
-from .get_classic_markers import get_classic_markers, number_of_classic_markers
+from .annotate_integrated import annotate_integrated
+from .annotate_single import annotate_single
+from .build_integrated_references import IntegratedReferences, build_integrated_references
 from .build_single_reference import build_single_reference
-from .build_integrated_references import build_integrated_references, IntegratedReferences
-from .classify_single_reference import classify_single_reference
 from .classify_integrated_references import classify_integrated_references
-from .fetch_reference import fetch_github_reference, realize_github_markers
-from .annotate_single import annotate_single
-from .annotate_integrated import annotate_integrated
+from .classify_single_reference import classify_single_reference
+from .get_classic_markers import get_classic_markers, number_of_classic_markers
diff --git a/src/singler/_utils.py b/src/singler/_utils.py
index e6a4ed6..4e31b51 100644
--- a/src/singler/_utils.py
+++ b/src/singler/_utils.py
@@ -74,11 +74,18 @@ def _clean_matrix(x, features, assay_type, check_missing, num_threads):
         return x, features
 
     if isinstance(x, SummarizedExperiment):
+        if features is None:
+            features = x.get_row_names()
+        elif isinstance(features, str):
+            features = x.get_row_data().column(features)
+        features = list(features)
+
         x = x.assay(assay_type)
 
     curshape = x.shape
     if len(curshape) != 2:
         raise ValueError("each entry of 'ref' should be a 2-dimensional array")
+
     if curshape[0] != len(features):
         raise ValueError(
             "number of rows of 'x' should be equal to the length of 'features'"
diff --git a/src/singler/annotate_integrated.py b/src/singler/annotate_integrated.py
index fad6f42..afb38f4 100644
--- a/src/singler/annotate_integrated.py
+++ b/src/singler/annotate_integrated.py
@@ -3,7 +3,7 @@
 from biocframe import BiocFrame
 
 from ._utils import _clean_matrix
-from .annotate_single import _attach_markers, _resolve_reference
+from .annotate_single import _resolve_reference
 from .build_integrated_references import build_integrated_references
 from .build_single_reference import build_single_reference
 from .classify_integrated_references import classify_integrated_references
@@ -12,15 +12,14 @@
 
 def annotate_integrated(
     test_data: Any,
-    test_features: Sequence,
     ref_data_list: Sequence[Union[Any, str]],
-    ref_labels_list: Union[str, Sequence[Union[Sequence, str]]],
-    ref_features_list: Union[str, Sequence[Union[Sequence, str]]],
+    test_features: Optional[Union[Sequence, str]] = None,
+    ref_labels_list: Optional[Union[Optional[str], Sequence[Union[Sequence, str]]]] = None,
+    ref_features_list: Optional[Union[Optional[str], Sequence[Union[Sequence, str]]]] = None,
     test_assay_type: Union[str, int] = 0,
     test_check_missing: bool = True,
     ref_assay_type: Union[str, int] = "logcounts",
     ref_check_missing: bool = True,
-    cache_dir: Optional[str] = None,
     build_single_args: dict = {},
     classify_single_args: dict = {},
     build_integrated_args: dict = {},
@@ -45,6 +44,11 @@ def annotate_integrated(
             Sequence of length equal to the number of rows in
             ``test_data``, containing the feature identifier for each row.
 
+            Alternatively, if ``test_data`` is a ``SummarizedExperiment``, ``test_features``
+            may be a string speciying the column name in `row_data` that contains the
+            features. It can also be set to `None`, to use the `row_names` of the
+            experiment as features.
+
         ref_data_list:
             Sequence consisting of one or more of the following:
 
@@ -69,6 +73,10 @@ def annotate_integrated(
             - If ``ref_data_list[i]`` is a string, ``ref_labels_list[i]`` should be a string
               specifying the label type to use, e.g., "main", "fine", "ont".
               If a single string is supplied, it is recycled for all ``ref_data``.
+            - If ``ref_data_list[i]`` is a ``SummarizedExperiment``, ``ref_labels_list[i]``
+              may be a string speciying the column name in `column_data` that contains the
+              features. It can also be set to `None`, to use the `column_names`of the
+              experiment as features.
 
         ref_features_list:
             Sequence of the same length as ``ref_data_list``, where the contents
@@ -80,6 +88,10 @@ def annotate_integrated(
             - If ``ref_data_list[i]`` is a string, ``ref_features_list[i]`` should be a string
               specifying the feature type to use, e.g., "ensembl", "symbol".
               If a single string is supplied, it is recycled for all ``ref_data``.
+            - If ``ref_data_list[i]`` is a ``SummarizedExperiment``, ``ref_features_list[i]``
+              may be a string speciying the column name in `row_data` that contains the
+              features. It can also be set to `None`, to use the `row_names` of the
+              experiment as features.
 
         test_assay_type:
             Assay of ``test_data`` containing the expression matrix, if ``test_data`` is a
@@ -95,11 +107,6 @@ def annotate_integrated(
         ref_check_missing:
             Whether to check for and remove missing (i.e., NaN) values from the reference datasets.
 
-        cache_dir:
-            Path to a cache directory for downloading reference files, see
-            :py:meth:`~singler.fetch_reference.fetch_github_reference` for details.
-            Only used if ``ref_data`` is a string.
-
         build_single_args:
             Further arguments to pass to
             :py:meth:`~singler.build_single_reference.build_single_reference`.
@@ -128,18 +135,22 @@ def annotate_integrated(
         :py:meth:`~singler.classify_integrated_references.classify_integrated_references`).
     """
     nrefs = len(ref_data_list)
+
     if isinstance(ref_labels_list, str):
         ref_labels_list = [ref_labels_list] * nrefs
-    elif nrefs != len(ref_labels_list):
-        raise ValueError(
-            "'ref_data_list' and 'ref_labels_list' must be the same length"
-        )
+    elif ref_labels_list is None:
+        ref_labels_list = [None] * nrefs
+
+    if nrefs != len(ref_labels_list):
+        raise ValueError("'ref_data_list' and 'ref_labels_list' must be the same length")
+
     if isinstance(ref_features_list, str):
         ref_features_list = [ref_features_list] * nrefs
-    elif nrefs != len(ref_features_list):
-        raise ValueError(
-            "'ref_data_list' and 'ref_features_list' must be the same length"
-        )
+    elif ref_features_list is None:
+        ref_features_list = [None] * nrefs
+
+    if nrefs != len(ref_features_list):
+        raise ValueError("'ref_data_list' and 'ref_features_list' must be the same length")
 
     test_ptr, test_features = _clean_matrix(
         test_data,
@@ -157,13 +168,11 @@ def annotate_integrated(
     test_features_set = set(test_features)
 
     for r in range(nrefs):
-        curref_mat, curref_labels, curref_features, curref_markers = _resolve_reference(
+        curref_mat, curref_labels, curref_features = _resolve_reference(
             ref_data=ref_data_list[r],
             ref_labels=ref_labels_list[r],
             ref_features=ref_features_list[r],
-            cache_dir=cache_dir,
             build_args=build_single_args,
-            test_features_set=test_features_set,
         )
 
         curref_ptr, curref_features = _clean_matrix(
@@ -174,13 +183,12 @@ def annotate_integrated(
             num_threads=num_threads,
         )
 
-        bargs = _attach_markers(curref_markers, build_single_args)
         curbuilt = build_single_reference(
             ref_data=curref_ptr,
             ref_labels=curref_labels,
             ref_features=curref_features,
             restrict_to=test_features_set,
-            **bargs,
+            **build_single_args,
             num_threads=num_threads,
         )
 
diff --git a/src/singler/annotate_single.py b/src/singler/annotate_single.py
index 03be964..6b1ad36 100644
--- a/src/singler/annotate_single.py
+++ b/src/singler/annotate_single.py
@@ -1,57 +1,53 @@
-from copy import copy
+import warnings
 from typing import Any, Optional, Sequence, Union
 
 from biocframe import BiocFrame
+from summarizedexperiment import SummarizedExperiment
 
 from .build_single_reference import build_single_reference
 from .classify_single_reference import classify_single_reference
-from .fetch_reference import fetch_github_reference, realize_github_markers
 
 
-def _resolve_reference(
-    ref_data, ref_labels, ref_features, cache_dir, build_args, test_features_set
-):
-    if isinstance(ref_data, str):
-        ref = fetch_github_reference(ref_data, cache_dir=cache_dir)
-        ref_features = ref.row_data.column(ref_features)
+def _resolve_reference(ref_data, ref_labels, ref_features, build_args):
+    if isinstance(ref_data, SummarizedExperiment) or issubclass(type(ref_data), SummarizedExperiment):
+        if ref_features is None:
+            ref_features = ref_data.get_row_names()
+        elif isinstance(ref_features, str):
+            ref_features = ref_data.get_row_data().column(ref_features)
 
-        num_de = None
-        if "marker_args" in build_args:
-            marker_args = build_args["marker_args"]
-            if "num_de" in marker_args:
-                num_de = marker_args["num_de"]
+        ref_features = list(ref_features)
 
-        ref_markers = realize_github_markers(
-            ref.metadata[ref_labels],
-            ref_features,
-            num_markers=num_de,
-            restrict_to=test_features_set,
-        )
+        if ref_labels is None:
+            ref_labels = ref_data.get_column_names()
+        elif isinstance(ref_labels, str):
+            ref_labels = ref_data.get_column_data().column(ref_labels)
 
-        ref_data = ref.assay("ranks")
-        ref_labels = ref.col_data.column(ref_labels)
-    else:
-        ref_markers = None
+        ref_labels = list(ref_labels)
 
-    return ref_data, ref_labels, ref_features, ref_markers
+        try:
+            _default_asy = "logcounts"
+            if "assay_type" in build_args:
+                _default_asy = build_args["assay_type"]
 
+            ref_data = ref_data.assay(_default_asy)
+        except Exception as _:
+            raise ValueError(f"Reference dataset must contain log-normalized count ('{_default_asy}') assay.")
 
-def _attach_markers(markers, build_args):
-    if markers is not None and "markers" not in build_args:
-        tmp = copy(build_args)
-        tmp["markers"] = markers
-        print(tmp)
-        return tmp
-    return build_args
+    if ref_labels is None:
+        raise ValueError("'ref_labels' cannot be `None`.")
+
+    if ref_features is None:
+        raise ValueError("'ref_features' cannot be `None`.")
+
+    return ref_data, ref_labels, ref_features
 
 
 def annotate_single(
     test_data: Any,
-    test_features: Sequence,
     ref_data: Any,
-    ref_labels: Union[Sequence, str],
-    ref_features: Union[Sequence, str],
-    cache_dir: Optional[str] = None,
+    ref_labels: Optional[Union[Sequence, str]],
+    test_features: Optional[Union[Sequence, str]] = None,
+    ref_features: Optional[Union[Sequence, str]] = None,
     build_args: dict = {},
     classify_args: dict = {},
     num_threads: int = 1,
@@ -74,36 +70,41 @@ def annotate_single(
             Sequence of length equal to the number of rows in
             ``test_data``, containing the feature identifier for each row.
 
+            Alternatively, if ``test_data`` is a ``SummarizedExperiment``, ``test_features``
+            may be a string speciying the column name in `row_data` that contains the
+            features. It can also be set to `None`, to use the `row_names` of
+            the experiment as features.
+
         ref_data:
             A matrix-like object representing the reference dataset, where rows
             are features and columns are samples. Entries should be expression values,
             usually log-transformed (see comments for the ``ref`` argument in
             :py:meth:`~singler.build_single_reference.build_single_reference`).
 
-            Alternatively, a string that can be passed as ``name`` to
-            :py:meth:`~singler.fetch_reference.fetch_github_reference`.
-            This will use the specified dataset as the reference.
+            Alternatively, a
+            :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`
+            containing such a matrix in one of its assays. Non-default assay
+            types can be specified in ``classify_args``.
 
         ref_labels:
             If ``ref_data`` is a matrix-like object, ``ref_labels`` should be
             a sequence of length equal to the number of columns of ``ref_data``,
             containing the label associated with each column.
 
-            If ``ref_data`` is a string, ``ref_labels`` should be a string
-            specifying the label type to use, e.g., "main", "fine", "ont".
+            Alternatively, if ``ref_data`` is a ``SummarizedExperiment``, 
+            ``ref_labels`` may be a string specifying the label type to use,
+            e.g., "main", "fine", "ont". It can also be set to `None`, to use 
+            the `row_names` of the experiment as features.
 
         ref_features:
             If ``ref_data`` is a matrix-like object, ``ref_features`` should be
             a sequence of length equal to the number of rows of ``ref_data``,
             containing the feature identifier associated with each row.
 
-            If ``ref_data`` is a string, ``ref_features`` should be a string
-            specifying the label type to use, e.g., "ensembl", "symbol".
-
-        cache_dir:
-            Path to a cache directory for downloading reference files, see
-            :py:meth:`~singler.fetch_reference.fetch_github_reference` for details.
-            Only used if ``ref_data`` is a string.
+            Alternatively, if ``ref_data`` is a ``SummarizedExperiment``, 
+            ``ref_features`` may be a string speciying the column name in `column_data`
+            that contains the features. It can also be set to `None`, to use the 
+            `row_names` of the experiment as features.
 
         build_args:
             Further arguments to pass to
@@ -123,24 +124,36 @@ def annotate_single(
         specifying the markers that were used for each pairwise comparison
         between labels; and a list of ``unique_markers`` across all labels.
     """
+
+    if isinstance(test_data, SummarizedExperiment):
+        if test_features is None:
+            test_features = test_data.get_row_names()
+        elif isinstance(test_features, str):
+            test_features = test_data.get_row_data().column(test_features)
+
+    if test_features is None:
+        raise ValueError("'test_features' cannot be `None`.")
+
     test_features_set = set(test_features)
+    if len(test_features_set) != len(test_features):
+        warnings.warn("'test_features' is not unique, subsetting test matrix...", UserWarning)
+        _idxs = [test_features.index(x) for x in test_features_set]
+        print("modifying test data")
+        test_data = test_data[_idxs,]
 
-    ref_data, ref_labels, ref_features, markers = _resolve_reference(
+    ref_data, ref_labels, ref_features = _resolve_reference(
         ref_data=ref_data,
         ref_labels=ref_labels,
         ref_features=ref_features,
-        cache_dir=cache_dir,
         build_args=build_args,
-        test_features_set=test_features_set,
     )
 
-    bargs = _attach_markers(markers, build_args)
     built = build_single_reference(
         ref_data=ref_data,
         ref_labels=ref_labels,
         ref_features=ref_features,
         restrict_to=test_features_set,
-        **bargs,
+        **build_args,
         num_threads=num_threads,
     )
 
diff --git a/src/singler/classify_single_reference.py b/src/singler/classify_single_reference.py
index 4643e44..fa233f0 100644
--- a/src/singler/classify_single_reference.py
+++ b/src/singler/classify_single_reference.py
@@ -37,11 +37,16 @@ def classify_single_reference(
             Sequence of identifiers for each feature in the test
             dataset, i.e., row in ``test_data``.
 
+            If ``test_data`` is a ``SummarizedExperiment``, ``test_features`` 
+            may be a string speciying the column name in `row_data`that contains the
+            features. Alternatively can be set to `None`, to use the `row_names` of 
+            the experiment as used as features.
+
         ref_prebuilt:
             A pre-built reference created with
             :py:meth:`~singler.build_single_reference.build_single_reference`.
 
-        assay_type: 
+        assay_type:
             Assay containing the expression matrix,
             if `test_data` is a
             :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
diff --git a/src/singler/fetch_reference.py b/src/singler/fetch_reference.py
deleted file mode 100644
index 7449364..0000000
--- a/src/singler/fetch_reference.py
+++ /dev/null
@@ -1,243 +0,0 @@
-import gzip
-import os
-import tempfile
-import urllib.parse
-import urllib.request as req
-from typing import Any, Literal, Optional, Sequence, Union
-
-import biocframe
-import numpy
-import summarizedexperiment
-
-SESSION_DIR = None
-
-KNOWN_REFERENCE = Literal[
-    "BlueprintEncode",
-    "DatabaseImmuneCellExpression",
-    "HumanPrimaryCellAtlas",
-    "MonacoImmune",
-    "NovershternHematopoietic",
-    "ImmGen",
-    "MouseRNAseq",
-]
-
-
-def fetch_github_reference(
-    name: KNOWN_REFERENCE, cache_dir: Optional[str] = None, multiple_ids: bool = False
-) -> summarizedexperiment.SummarizedExperiment:
-    """Fetch a reference dataset from the
-    `pre-compiled GitHub registry <https://github.com/kanaverse/singlepp-references>`_,
-    for use in annotation with other **singler** functions.
-
-    Args:
-        name: Name of the reference dataset.
-
-        cache_dir: Path to a cache directory in which to store
-            the files downloaded from the remote. If the files are already
-            present, the download is skipped.
-
-        multiple_ids: Whether to report multiple feature IDs.
-            If True, each feature is represented by a list with zero,
-            one or more feature identifiers (e.g., for ambiguous mappings).
-            If False, each feature is represented by a string or None.
-
-    Returns:
-        The reference dataset as a SummarizedExperiment,
-        parts of which can be passed to :py:meth:`~singler.build_single_reference.build_single_reference`.
-
-    Specifically, the ``ranks`` assay of the output can be used as ``ref`` in
-    :py:meth:`~singler.build_single_reference.build_single_reference`;
-    one of the labels in the column data can be used as ``labels``;
-    and one of the gene types in the row data can be used as ``features``.
-
-    As the ranks are not log-normalized values, users should also use
-    the relevant pre-computed marker list in the metadata. The selected
-    marker list should match up with the chosen set of ``labels``. In
-    addition, the markers are stored as row indices and need to be converted
-    to feature identifiers; this is achieved by passing the marker list to
-    :py:meth:`~singler.fetch_reference.realize_github_markers` with the same
-    gene types that were used in ``features``. The output can then be passed
-    as ``markers`` in the `build_reference()` call.
-
-    If ``multiple_ids = True``, each ``row_data`` column will be a list of
-    lists of possible identifiers for each feature. Callers are responsible for
-    resolving this list of lists into a list of single identifiers for each
-    feature, before passing it onto other functions like
-    :py:meth:`~singler.build_single_reference.build_single_reference`.
-    """
-
-    all_files = {"matrix": name + "_matrix.csv.gz"}
-    gene_types = ["ensembl", "entrez", "symbol"]
-    for g in gene_types:
-        suff = "genes_" + g
-        all_files[suff] = name + "_" + suff + ".csv.gz"
-
-    lab_types = ["fine", "main", "ont"]
-    for lab in lab_types:
-        suff = "labels_" + lab
-        all_files[suff] = name + "_" + suff + ".csv.gz"
-        suff = "label_names_" + lab
-        all_files[suff] = name + "_" + suff + ".csv.gz"
-        suff = "markers_" + lab
-        all_files[suff] = name + "_" + suff + ".gmt.gz"
-
-    base_url = (
-        "https://github.com/kanaverse/singlepp-references/releases/download/2023-04-28"
-    )
-
-    if cache_dir is None:
-        global SESSION_DIR
-        # This should already lie inside the OS's temporary directory, based on
-        # documentation for tempfile.gettempdir(); no need to clean it up afterwards.
-        if SESSION_DIR is None:
-            SESSION_DIR = tempfile.mkdtemp()
-        cache_dir = SESSION_DIR
-    elif not os.path.exists(cache_dir):
-        os.makedirs(cache_dir)
-
-    all_paths = {}
-    for k, v in all_files.items():
-        url = base_url + "/" + v
-        path = os.path.join(cache_dir, urllib.parse.quote(url, safe=""))
-        if not os.path.exists(path):
-            req.urlretrieve(url=url, filename=path)
-        all_paths[k] = path
-
-    # Reading in labels.
-    labels = {}
-    markers = {}
-    for lab in lab_types:
-        all_labels = []
-        with gzip.open(all_paths["labels_" + lab], "rt") as handle:
-            for line in handle:
-                line = line.strip()
-                if line == "NA":  # I dunno man, I dunno.
-                    all_labels.append(None)
-                else:
-                    all_labels.append(int(line))
-
-        all_label_names = []
-        with gzip.open(all_paths["label_names_" + lab], "rt") as handle:
-            for line in handle:
-                all_label_names.append(line.strip())
-
-        for i, x in enumerate(all_labels):
-            if x is not None:
-                all_labels[i] = all_label_names[x]
-        labels[lab] = all_labels
-
-        current_markers = {}
-        for x in all_label_names:
-            current_inner = {}
-            for x2 in all_label_names:
-                current_inner[x2] = []
-            current_markers[x] = current_inner
-
-        with gzip.open(all_paths["markers_" + lab], "rt") as handle:
-            for line in handle:
-                fields = line.strip().split("\t")
-                first = all_label_names[int(fields[0])]
-                second = all_label_names[int(fields[1])]
-                current_markers[first][second] = [int(j) for j in fields[2:]]
-
-        markers[lab] = current_markers
-
-    # Reading in genes.
-    gene_ids = {}
-    for g in gene_types:
-        with gzip.open(all_paths["genes_" + g], "rt") as handle:
-            current_genes = []
-            for line in handle:
-                y = line.strip()
-                if multiple_ids:
-                    if y == "":
-                        y = []
-                    else:
-                        y = y.split("\t")
-                else:
-                    if y == "":
-                        y = None
-                    else:
-                        tx = y.find("\t")
-                        if tx != -1:
-                            y = y[:tx]
-                current_genes.append(y)
-            gene_ids[g] = current_genes
-
-    row_data = biocframe.BiocFrame(gene_ids)
-    col_data = biocframe.BiocFrame(labels)
-
-    # Reading in the matrix first.
-    mat = numpy.ndarray(
-        (row_data.shape[0], col_data.shape[0]), dtype=numpy.int32, order="F"
-    )
-    with gzip.open(all_paths["matrix"], "rt") as handle:
-        sample = 0
-        for line in handle:
-            contents = line.strip().split(",")
-            for i, x in enumerate(contents):
-                contents[i] = int(x)
-            mat[:, sample] = contents
-            sample += 1
-
-    return summarizedexperiment.SummarizedExperiment(
-        {"ranks": mat}, row_data=row_data, column_data=col_data, metadata=markers
-    )
-
-
-def realize_github_markers(
-    markers: dict[Any, dict[Any, Sequence]],
-    features: Sequence,
-    num_markers: Optional[int] = None,
-    restrict_to: Optional[Union[set, dict]] = None,
-) -> dict[Any, dict[Any, Sequence]]:
-    """Convert marker indices from a GitHub reference dataset into feature identifiers.  This allows the markers to be
-    used in :py:meth:`~singler.build_single_reference.build_single_reference`.
-
-    Args:
-        markers:
-            Upregulated markers for each pairwise comparison between labels.
-            Specifically, ``markers[a][b]`` should be a sequence of features
-            that are upregulated in ``a`` compared to ``b``. Features are
-            represented as indices into ``features``.
-
-        features:
-            Sequence of identifiers for each feature. Features with no valid
-            identifier for a particular gene type (e.g., no known symbol)
-            should be represented by None.
-
-        num_markers:
-            Number of markers to retain. If None, all markers are retained.
-
-        restrict_to:
-            Subset of available features to restrict the marker selection.
-            Only features in ``restrict_to`` will be reported in the output.
-            If None, no restriction is performed.
-
-    Returns:
-        A dictionary with the same structure
-        as ``markers``, where each inner sequence contains the corresponding
-        feature identifiers in ``features``. Feature identifiers are guaranteed
-        to be non-None and to be in ``restrict_to`` (if specified). Each
-        inner sequence should have length ``num_markers`` (or less, if not
-        enough non-None/restricted identifiers are available).
-    """
-    output = {}
-    for k, v in markers.items():
-        current = {}
-
-        for k2, v2 in v.items():
-            renamed = []
-
-            for i in v2:
-                if num_markers is not None and len(renamed) == num_markers:
-                    break
-                feat = features[i]
-                if feat is not None:
-                    if restrict_to is None or feat in restrict_to:
-                        renamed.append(feat)
-
-            current[k2] = renamed
-        output[k] = current
-
-    return output
diff --git a/src/singler/get_classic_markers.py b/src/singler/get_classic_markers.py
index 2ed9fa2..14fb675 100644
--- a/src/singler/get_classic_markers.py
+++ b/src/singler/get_classic_markers.py
@@ -111,8 +111,10 @@ def get_classic_markers(
         ref_data:
             A matrix-like object containing the log-normalized expression values of a reference dataset.
             Each column is a sample and each row is a feature.
+            
             Alternatively, this can be a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`
             containing a matrix-like object in one of its assays.
+            
             Alternatively, a list of such matrices or ``SummarizedExperiment`` objects,
             typically for multiple batches of the same reference;
             it is assumed that different batches exhibit at least some overlap in their ``features`` and ``labels``.
diff --git a/tests/test_annotate_single.py b/tests/test_annotate_single.py
index 540a43f..ff123a3 100644
--- a/tests/test_annotate_single.py
+++ b/tests/test_annotate_single.py
@@ -59,42 +59,3 @@ def test_annotate_single_intersect():
         output.column("scores").column("B") == expected.column("scores").column("B")
     ).all()
 
-
-def test_annotate_single_github():
-    se = singler.fetch_github_reference("ImmGen", cache_dir="_cache")
-
-    keep = range(5, se.shape[0], 2)
-    test = numpy.random.rand(len(keep), 50)
-    ref_features = se.row_data.column("symbol")
-    test_features = [ref_features[i] for i in keep]
-
-    output = singler.annotate_single(
-        test,
-        test_features=test_features,
-        ref_data="ImmGen",
-        ref_features="symbol",
-        ref_labels="main",
-        cache_dir="_cache",
-    )
-    assert output.shape[0] == 50
-
-    expected_markers = singler.realize_github_markers(
-        se.metadata["main"],
-        se.row_data.column("symbol"),
-        restrict_to=set(test_features),
-    )
-    assert output.metadata["markers"] == expected_markers
-
-    # Checking that we handle the number of markers correctly.
-    more_output = singler.annotate_single(
-        test,
-        test_features=test_features,
-        ref_data="ImmGen",
-        ref_features="symbol",
-        ref_labels="main",
-        build_args={"marker_args": {"num_de": 10}},
-        cache_dir="_cache",
-    )
-
-    ref_labels = list(set(se.column_data.column("main")))
-    assert len(more_output.metadata["markers"][ref_labels[0]][ref_labels[1]]) == 10
diff --git a/tests/test_fetch_reference.py b/tests/test_fetch_reference.py
deleted file mode 100644
index 7f241fb..0000000
--- a/tests/test_fetch_reference.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import singler
-import summarizedexperiment
-import re
-import numpy
-
-
-def test_fetch_github_reference():
-    out = singler.fetch_github_reference("ImmGen", cache_dir="_cache")
-    assert isinstance(out, summarizedexperiment.SummarizedExperiment)
-
-    # Checking the genes.
-    assert out.row_data.column("ensembl")[0].startswith("ENS")
-    assert re.match("^[0-9]+", out.row_data.column("entrez")[0]) is not None
-    assert re.match("^[A-Z][a-z]+[0-9]*", out.row_data.column("symbol")[0]) is not None
-
-    ens = out.row_data.column("ensembl")
-    has_none = False
-    for x in ens:
-        if x is None:
-            has_none = True
-            break
-    assert has_none
-
-    has_tab = False
-    for x in ens:
-        if x is not None and x.find("\t") != -1:
-            has_tab = True
-            break
-    assert not has_tab
-
-    # Checking the labels.
-    assert isinstance(out.col_data.column("fine")[0], str)
-    assert isinstance(out.col_data.column("main")[0], str)
-    assert isinstance(out.col_data.column("ont")[0], str)
-
-    # Checking the assay.
-    ass = out.assays["ranks"]
-    assert ass.shape[0] > ass.shape[1]
-    assert (ass.min(0) == numpy.ones(ass.shape[1])).all()
-
-    # Checking markers.
-    markers = out.metadata["fine"]
-    flabs = out.col_data.column("fine")
-    all_labels = sorted(list(set(flabs)))
-    assert sorted(markers.keys()) == all_labels
-    assert sorted(markers[all_labels[0]].keys()) == all_labels
-    assert len(markers[all_labels[0]][all_labels[0]]) == 0
-    assert len(markers[all_labels[0]][all_labels[1]]) > 0
-
-
-def test_fetch_github_reference_multiple():
-    out = singler.fetch_github_reference(
-        "ImmGen", cache_dir="_cache", multiple_ids=True
-    )
-
-    ens = out.row_data.column("ensembl")
-    all_lengths = set()
-    for x in ens:
-        all_lengths.add(len(x))
-    assert 0 in all_lengths
-    assert 1 in all_lengths
-    assert 2 in all_lengths
-
-
-def test_realize_github_markers():
-    markers = {"A": {"B": [1, 3, 5, 7]}}
-    out = singler.realize_github_markers(
-        markers, ["A", "B", "C", "D", "E", "F", "G", "H"]
-    )
-    assert out["A"]["B"] == ["B", "D", "F", "H"]
-
-    # Behaves with the number of markers set.
-    out = singler.realize_github_markers(
-        markers, ["A", "B", "C", "D", "E", "F", "G", "H"], num_markers=2
-    )
-    assert out["A"]["B"] == ["B", "D"]
-
-    out = singler.realize_github_markers(
-        markers, ["A", "B", "C", None, "E", "F", "G", "H"], num_markers=2
-    )
-    assert out["A"]["B"] == ["B", "F"]
-
-    # Behaves with the restrict_to set.
-    out = singler.realize_github_markers(
-        markers,
-        ["A", "B", "C", "D", "E", "F", "G", "H"],
-        restrict_to=set(["E", "F", "G", "H"]),
-    )
-    assert out["A"]["B"] == ["F", "H"]
diff --git a/tests/test_integrated_with_celldex.py b/tests/test_integrated_with_celldex.py
new file mode 100644
index 0000000..8181d66
--- /dev/null
+++ b/tests/test_integrated_with_celldex.py
@@ -0,0 +1,75 @@
+import singler
+import numpy
+import celldex
+import scrnaseq
+import pandas as pd
+import scipy
+import pytest
+from biocframe import BiocFrame
+
+
+def test_with_minimal_args():
+    sce = scrnaseq.fetch_dataset("zeisel-brain-2015", "2023-12-14", realize_assays=True)
+
+    blueprint_ref = celldex.fetch_reference(
+        "blueprint_encode", "2024-02-26", realize_assays=True
+    )
+    immune_cell_ref = celldex.fetch_reference("dice", "2024-02-26", realize_assays=True)
+
+    with pytest.raises(Exception):
+        singler.annotate_integrated(
+            test_data=sce.assays["counts"],
+            ref_data_list=(blueprint_ref, immune_cell_ref),
+            ref_labels_list="label.main",
+            num_threads=6,
+        )
+
+    single, integrated = singler.annotate_integrated(
+        test_data=sce,
+        ref_data_list=(blueprint_ref, immune_cell_ref),
+        ref_labels_list="label.main",
+        num_threads=6,
+    )
+    assert len(single) == 2
+    assert isinstance(integrated, BiocFrame)
+
+
+def test_with_all_supplied():
+    sce = scrnaseq.fetch_dataset("zeisel-brain-2015", "2023-12-14", realize_assays=True)
+
+    blueprint_ref = celldex.fetch_reference(
+        "blueprint_encode", "2024-02-26", realize_assays=True
+    )
+    immune_cell_ref = celldex.fetch_reference("dice", "2024-02-26", realize_assays=True)
+
+    single, integrated = singler.annotate_integrated(
+        test_data=sce,
+        test_features=sce.get_row_names(),
+        ref_data_list=(blueprint_ref, immune_cell_ref),
+        ref_labels_list=[
+            x.get_column_data().column("label.main")
+            for x in (blueprint_ref, immune_cell_ref)
+        ],
+        ref_features_list=[x.get_row_names() for x in (blueprint_ref, immune_cell_ref)],
+    )
+
+    assert len(single) == 2
+    assert isinstance(integrated, BiocFrame)
+
+
+def test_with_colname():
+    sce = scrnaseq.fetch_dataset("zeisel-brain-2015", "2023-12-14", realize_assays=True)
+
+    blueprint_ref = celldex.fetch_reference(
+        "blueprint_encode", "2024-02-26", realize_assays=True
+    )
+    immune_cell_ref = celldex.fetch_reference("dice", "2024-02-26", realize_assays=True)
+
+    single, integrated = singler.annotate_integrated(
+        test_data=sce,
+        ref_data_list=(blueprint_ref, immune_cell_ref),
+        ref_labels_list="label.main",
+    )
+
+    assert len(single) == 2
+    assert isinstance(integrated, BiocFrame)
diff --git a/tests/test_single_with_celldex.py b/tests/test_single_with_celldex.py
new file mode 100644
index 0000000..b5cd6db
--- /dev/null
+++ b/tests/test_single_with_celldex.py
@@ -0,0 +1,64 @@
+import singler
+import numpy
+import celldex
+import scrnaseq
+import pandas as pd
+import scipy
+import pytest
+from biocframe import BiocFrame
+
+def test_with_minimal_args():
+    sce = scrnaseq.fetch_dataset("zeisel-brain-2015", "2023-12-14", realize_assays=True)
+
+    immgen_ref = celldex.fetch_reference("immgen", "2024-02-26", realize_assays=True)
+
+    with pytest.raises(Exception):
+        matches = singler.annotate_single(
+            test_data=sce.assays["counts"],
+            ref_data=immgen_ref,
+            ref_labels=immgen_ref.get_column_data().column("label.main"),
+        )
+
+    matches = singler.annotate_single(
+        test_data=sce,
+        ref_data=immgen_ref,
+        ref_labels=immgen_ref.get_column_data().column("label.main"),
+    )
+    assert isinstance(matches, BiocFrame)
+
+    counts = pd.Series(matches["best"]).value_counts()
+    assert counts is not None
+
+
+def test_with_all_supplied():
+    sce = scrnaseq.fetch_dataset("zeisel-brain-2015", "2023-12-14", realize_assays=True)
+
+    immgen_ref = celldex.fetch_reference("immgen", "2024-02-26", realize_assays=True)
+
+    matches = singler.annotate_single(
+        test_data=sce,
+        test_features=sce.get_row_names(),
+        ref_data=immgen_ref,
+        ref_labels=immgen_ref.get_column_data().column("label.main"),
+        ref_features=immgen_ref.get_row_names(),
+    )
+    assert isinstance(matches, BiocFrame)
+
+    counts = pd.Series(matches["best"]).value_counts()
+    assert counts is not None
+
+
+def test_with_colname():
+    sce = scrnaseq.fetch_dataset("zeisel-brain-2015", "2023-12-14", realize_assays=True)
+
+    immgen_ref = celldex.fetch_reference("immgen", "2024-02-26", realize_assays=True)
+
+    matches = singler.annotate_single(
+        test_data=sce,
+        ref_data=immgen_ref,
+        ref_labels="label.main",
+    )
+    assert isinstance(matches, BiocFrame)
+
+    counts = pd.Series(matches["best"]).value_counts()
+    assert counts is not None