diff --git a/README.md b/README.md
index 21a6a93..cee48d7 100644
--- a/README.md
+++ b/README.md
@@ -116,6 +116,18 @@ output = singler.classify_single_reference(
 )
 ```
 
+    ## output
+    BiocFrame with 4340 rows and 3 columns
+                best                                   scores                delta
+            <list>                              <BiocFrame>   <ndarray[float64]>
+    [0] Monocytes 0.33265560369962943:0.407117403330602...  0.40706830113982534
+    [1] Monocytes 0.4078771641637374:0.4783396310685646...  0.07000418564184802
+    [2] Monocytes 0.3517036021728629:0.4076971245524348...  0.30997293412307647
+                ...                                      ...                  ...
+    [4337]  NK cells 0.3472631136865701:0.3937898240670208...  0.09640242155786138
+    [4338]   B-cells 0.26974632191999887:0.334862058137758... 0.061215905058676856
+    [4339] Monocytes 0.39390119034537324:0.468867490667427...  0.06678168346812047
+
 ## Integrating labels across references
 
 We can use annotations from multiple references through the `annotate_integrated()` function:
@@ -125,9 +137,9 @@ import singler
 single_results, integrated = singler.annotate_integrated(
     mat,
     features,
-    ref_data = ("BlueprintEncode", "DatabaseImmuneCellExpression"),
-    ref_features = "symbol",
-    ref_labels = "main",
+    ref_data_list = ("BlueprintEncode", "DatabaseImmuneCellExpression"),
+    ref_features_list= "symbol",
+    ref_labels_list = "main",
     build_integrated_args = { "ref_names": ("Blueprint", "DICE") },
     cache_dir = "_cache",
     num_threads = 6
diff --git a/setup.cfg b/setup.cfg
index ca8489a..59d2073 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -5,17 +5,17 @@
 
 [metadata]
 name = singler
-description = Add a short description here!
+description = Python bindings to the singleR algorithm to annotate cell types from known references.
 author = Aaron Lun
 author_email = lun.aaron@gene.com
 license = MIT
 license_files = LICENSE.txt
 long_description = file: README.md
 long_description_content_type = text/markdown; charset=UTF-8; variant=GFM
-url = https://github.com/pyscaffold/pyscaffold/
+url = https://github.com/BiocPy/singler
 # Add here related links, for example:
 project_urls =
-    Documentation = https://pyscaffold.org/
+    Documentation = https://github.com/BiocPy/singler
 #    Source = https://github.com/pyscaffold/pyscaffold/
 #    Changelog = https://pyscaffold.org/en/latest/changelog.html
 #    Tracker = https://github.com/pyscaffold/pyscaffold/issues
@@ -41,7 +41,7 @@ package_dir =
     =src
 
 # Require a min/specific Python version (comma-separated conditions)
-# python_requires = >=3.8
+python_requires = >=3.8
 
 # Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0.
 # Version specifiers like >=2.2,<3.0 avoid problems due to API changes in
@@ -50,10 +50,11 @@ package_dir =
 install_requires =
     importlib-metadata; python_version<"3.8"
     mattress>=0.1.4
-    assorthead
+    assorthead>=0.0.11
     delayedarray
-    biocframe
-    summarizedexperiment
+    biocframe>=0.5.0
+    summarizedexperiment>=0.4.0
+    biocutils
 
 [options.packages.find]
 where = src
diff --git a/src/singler/_Markers.py b/src/singler/_Markers.py
index 1c7bde5..9259f7d 100644
--- a/src/singler/_Markers.py
+++ b/src/singler/_Markers.py
@@ -1,6 +1,8 @@
+from typing import Any, Sequence
+
+from numpy import array, int32, ndarray
+
 from . import _cpphelpers as lib
-from numpy import ndarray, int32, array
-from typing import Sequence, Any
 
 
 class _Markers:
diff --git a/src/singler/_utils.py b/src/singler/_utils.py
index 2306a35..e6a4ed6 100644
--- a/src/singler/_utils.py
+++ b/src/singler/_utils.py
@@ -1,37 +1,15 @@
-from numpy import ndarray
 from typing import Sequence, Tuple
-from summarizedexperiment import SummarizedExperiment
-from mattress import tatamize, TatamiNumericPointer
-from delayedarray import DelayedArray
-
-
-def _factorize(x: Sequence) -> Tuple[Sequence, ndarray]:
-    levels = []
-    mapping = {}
-    indices = []
-
-    for i, lev in enumerate(x):
-        if lev is None:
-            indices.append(None)
-        else:
-            if lev not in mapping:
-                mapping[lev] = len(levels)
-                levels.append(lev)
-            indices.append(mapping[lev])
-
-    return levels, indices
 
+import biocutils as ut
+import numpy as np
+from delayedarray import DelayedArray
+from mattress import TatamiNumericPointer, tatamize
+from summarizedexperiment import SummarizedExperiment
 
-def _match(x: Sequence, levels: Sequence) -> ndarray:
-    mapping = _create_map(levels)
-    indices = []
-    for i, y in enumerate(x):
-        if y is None or y not in mapping:
-            indices.append(None)
-        else:
-            indices.append(mapping[y])
 
-    return indices
+def _factorize(x: Sequence) -> Tuple[list, np.ndarray]:
+    _factor = ut.Factor.from_sequence(x, sort_levels=False)
+    return _factor.levels, np.array(_factor.codes, np.int32)
 
 
 def _create_map(x: Sequence) -> dict:
@@ -92,7 +70,7 @@ def _clean_matrix(x, features, assay_type, check_missing, num_threads):
     if isinstance(x, TatamiNumericPointer):
         # Assume the pointer was previously generated from _clean_matrix,
         # so it's 2-dimensional, matches up with features and it's already
-        # clean of NaNs... so we no-op and just return it directly. 
+        # clean of NaNs... so we no-op and just return it directly.
         return x, features
 
     if isinstance(x, SummarizedExperiment):
diff --git a/src/singler/annotate_integrated.py b/src/singler/annotate_integrated.py
index 411f9e6..fad6f42 100644
--- a/src/singler/annotate_integrated.py
+++ b/src/singler/annotate_integrated.py
@@ -1,13 +1,13 @@
-from typing import Union, Sequence, Optional, Any, Tuple
+from typing import Any, Optional, Sequence, Tuple, Union
+
 from biocframe import BiocFrame
 
-from .fetch_reference import fetch_github_reference, realize_github_markers
-from .build_single_reference import build_single_reference
-from .classify_single_reference import classify_single_reference
+from ._utils import _clean_matrix
+from .annotate_single import _attach_markers, _resolve_reference
 from .build_integrated_references import build_integrated_references
+from .build_single_reference import build_single_reference
 from .classify_integrated_references import classify_integrated_references
-from .annotate_single import _resolve_reference, _attach_markers
-from ._utils import _clean_matrix
+from .classify_single_reference import classify_single_reference
 
 
 def annotate_integrated(
@@ -27,20 +27,22 @@ def annotate_integrated(
     classify_integrated_args: dict = {},
     num_threads: int = 1,
 ) -> Tuple[list[BiocFrame], BiocFrame]:
-    """Annotate a single-cell expression dataset based on the correlation 
+    """Annotate a single-cell expression dataset based on the correlation
     of each cell to profiles in multiple labelled references, where the
     annotation from each reference is then integrated across references.
 
     Args:
-        test_data: A matrix-like object representing the test dataset, where rows are
+        test_data:
+            A matrix-like object representing the test dataset, where rows are
             features and columns are samples (usually cells). Entries should be expression
             values; only the ranking within each column will be used.
 
             Alternatively, a
             :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`
-            containing such a matrix in one of its assays. 
+            containing such a matrix in one of its assays.
 
-        test_features: Sequence of length equal to the number of rows in
+        test_features:
+            Sequence of length equal to the number of rows in
             ``test_data``, containing the feature identifier for each row.
 
         ref_data_list:
@@ -50,7 +52,7 @@ def annotate_integrated(
               are features and columns are samples. Entries should be expression values,
               usually log-transformed (see comments for the ``ref`` argument in
               :py:meth:`~singler.build_single_reference.build_single_reference`).
-            - A 
+            - A
               :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`
               object containing such a matrix in its assays.
             - A string that can be passed as ``name`` to
@@ -129,18 +131,22 @@ def annotate_integrated(
     if isinstance(ref_labels_list, str):
         ref_labels_list = [ref_labels_list] * nrefs
     elif nrefs != len(ref_labels_list):
-        raise ValueError("'ref_data_list' and 'ref_labels_list' must be the same length")
+        raise ValueError(
+            "'ref_data_list' and 'ref_labels_list' must be the same length"
+        )
     if isinstance(ref_features_list, str):
         ref_features_list = [ref_features_list] * nrefs
     elif nrefs != len(ref_features_list):
-        raise ValueError("'ref_data_list' and 'ref_features_list' must be the same length")
+        raise ValueError(
+            "'ref_data_list' and 'ref_features_list' must be the same length"
+        )
 
     test_ptr, test_features = _clean_matrix(
         test_data,
         test_features,
-        assay_type = test_assay_type,
-        check_missing = test_check_missing,
-        num_threads = num_threads,
+        assay_type=test_assay_type,
+        check_missing=test_check_missing,
+        num_threads=num_threads,
     )
 
     all_ref_data = []
@@ -163,9 +169,9 @@ def annotate_integrated(
         curref_ptr, curref_features = _clean_matrix(
             curref_mat,
             curref_features,
-            assay_type = ref_assay_type,
-            check_missing = ref_check_missing,
-            num_threads = num_threads,
+            assay_type=ref_assay_type,
+            check_missing=ref_check_missing,
+            num_threads=num_threads,
         )
 
         bargs = _attach_markers(curref_markers, build_single_args)
diff --git a/src/singler/annotate_single.py b/src/singler/annotate_single.py
index 01b9cc1..03be964 100644
--- a/src/singler/annotate_single.py
+++ b/src/singler/annotate_single.py
@@ -1,14 +1,16 @@
-from typing import Union, Sequence, Optional, Any
-from biocframe import BiocFrame
 from copy import copy
+from typing import Any, Optional, Sequence, Union
+
+from biocframe import BiocFrame
 
-from .fetch_reference import fetch_github_reference, realize_github_markers
 from .build_single_reference import build_single_reference
 from .classify_single_reference import classify_single_reference
-from ._utils import _clean_matrix
+from .fetch_reference import fetch_github_reference, realize_github_markers
 
 
-def _resolve_reference(ref_data, ref_labels, ref_features, cache_dir, build_args, test_features_set):
+def _resolve_reference(
+    ref_data, ref_labels, ref_features, cache_dir, build_args, test_features_set
+):
     if isinstance(ref_data, str):
         ref = fetch_github_reference(ref_data, cache_dir=cache_dir)
         ref_features = ref.row_data.column(ref_features)
@@ -27,7 +29,7 @@ def _resolve_reference(ref_data, ref_labels, ref_features, cache_dir, build_args
         )
 
         ref_data = ref.assay("ranks")
-        ref_labels=ref.col_data.column(ref_labels)
+        ref_labels = ref.col_data.column(ref_labels)
     else:
         ref_markers = None
 
@@ -54,11 +56,12 @@ def annotate_single(
     classify_args: dict = {},
     num_threads: int = 1,
 ) -> BiocFrame:
-    """Annotate a single-cell expression dataset based on the correlation 
+    """Annotate a single-cell expression dataset based on the correlation
     of each cell to profiles in a labelled reference.
 
     Args:
-        test_data: A matrix-like object representing the test dataset, where rows are
+        test_data:
+            A matrix-like object representing the test dataset, where rows are
             features and columns are samples (usually cells). Entries should be expression
             values; only the ranking within each column will be used.
 
@@ -67,10 +70,12 @@ def annotate_single(
             containing such a matrix in one of its assays. Non-default assay
             types can be specified in ``classify_args``.
 
-        test_features: Sequence of length equal to the number of rows in
+        test_features:
+            Sequence of length equal to the number of rows in
             ``test_data``, containing the feature identifier for each row.
 
-        ref_data: A matrix-like object representing the reference dataset, where rows
+        ref_data:
+            A matrix-like object representing the reference dataset, where rows
             are features and columns are samples. Entries should be expression values,
             usually log-transformed (see comments for the ``ref`` argument in
             :py:meth:`~singler.build_single_reference.build_single_reference`).
diff --git a/src/singler/build_integrated_references.py b/src/singler/build_integrated_references.py
index e9575c2..19cb2d7 100644
--- a/src/singler/build_integrated_references.py
+++ b/src/singler/build_integrated_references.py
@@ -1,10 +1,11 @@
 from typing import Sequence, Optional, Union
 from numpy import array, ndarray, int32, uintp
-from mattress import tatamize
+
+import biocutils as ut
 
 from .build_single_reference import SinglePrebuiltReference
 from . import _cpphelpers as lib
-from ._utils import _stable_union, _factorize, _match, _clean_matrix
+from ._utils import _stable_union, _factorize, _clean_matrix
 
 
 class IntegratedReferences:
@@ -30,7 +31,7 @@ def reference_names(self) -> Union[Sequence[str], None]:
     def reference_labels(self) -> list:
         """List of lists containing the names of the labels for each reference.
 
-        Each entry corresponds to a reference in :py:attr:`~reference_names`, 
+        Each entry corresponds to a reference in :py:attr:`~reference_names`,
         if ``reference_names`` is not None.
         """
         return self._labels
@@ -55,25 +56,31 @@ def build_integrated_references(
     """Build a set of integrated references for classification of a test dataset.
 
     Arguments:
-        test_features: Sequence of features for the test dataset.
+        test_features:
+            Sequence of features for the test dataset.
 
-        ref_data_list: List of reference datasets, where each entry is equivalent to ``ref_data`` in
+        ref_data_list:
+            List of reference datasets, where each entry is equivalent to ``ref_data`` in
             :py:meth:`~singler.build_single_reference.build_single_reference`.
 
-        ref_labels_list: List of reference labels, where each entry is equivalent to ``ref_labels`` in
+        ref_labels_list:
+            List of reference labels, where each entry is equivalent to ``ref_labels`` in
             :py:meth:`~singler.build_single_reference.build_single_reference`.
 
-        ref_features_list: List of reference features, where each entry is equivalent to ``ref_features`` in
+        ref_features_list:
+            List of reference features, where each entry is equivalent to ``ref_features`` in
             :py:meth:`~singler.build_single_reference.build_single_reference`.
 
-        ref_prebuilt_list: List of prebuilt references, typically created by 
+        ref_prebuilt_list:
+            List of prebuilt references, typically created by
             calling :py:meth:`~singler.build_single_reference.build_single_reference` on the corresponding
             elements of ``ref_data_list``, ``ref_labels_list`` and ``ref_features_list``.
 
-        ref_names: Sequence of names for the references.
+        ref_names:
+            Sequence of names for the references.
             If None, these are automatically generated.
 
-        assay_type:
+        assasy_type:
             Assay containing the expression matrix for any entry of ``ref_data_list`` that is a
             :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
 
@@ -96,7 +103,7 @@ def build_integrated_references(
 
     universe = _stable_union(test_features, *ref_features_list)
     original_test_features = test_features
-    test_features = array(_match(test_features, universe), dtype=int32)
+    test_features = array(ut.match(test_features, universe), dtype=int32)
 
     converted_ref_data = []
     ref_data_ptrs = ndarray(nrefs, dtype=uintp)
@@ -107,14 +114,14 @@ def build_integrated_references(
         curptr, curfeatures = _clean_matrix(
             x,
             ref_features_list[i],
-            assay_type = assay_type,
-            check_missing = check_missing,
-            num_threads = num_threads,
+            assay_type=assay_type,
+            check_missing=check_missing,
+            num_threads=num_threads,
         )
         converted_ref_data.append(curptr)
         ref_data_ptrs[i] = curptr.ptr
 
-        ind = array(_match(curfeatures, universe), dtype=int32)
+        ind = array(ut.match(curfeatures, universe), dtype=int32)
         converted_feature_data.append(ind)
         ref_features_ptrs[i] = ind.ctypes.data
 
@@ -142,7 +149,9 @@ def build_integrated_references(
 
     if ref_names is not None:
         if nrefs != len(ref_names):
-            raise ValueError("'ref_names' and 'ref_data_list' should have the same length")
+            raise ValueError(
+                "'ref_names' and 'ref_data_list' should have the same length"
+            )
         elif nrefs != len(set(ref_names)):
             raise ValueError("'ref_names' should contain unique names")
 
diff --git a/src/singler/build_single_reference.py b/src/singler/build_single_reference.py
index dabe4cd..5761a65 100644
--- a/src/singler/build_single_reference.py
+++ b/src/singler/build_single_reference.py
@@ -1,9 +1,11 @@
-from numpy import int32, array, ndarray
-from typing import Sequence, Union, Any, Optional, Literal
+from typing import Any, Literal, Optional, Sequence, Union
+
+import biocutils as ut
+from numpy import array, int32, ndarray
 
-from ._Markers import _Markers
 from . import _cpphelpers as lib
-from ._utils import _factorize, _match, _clean_matrix, _restrict_features
+from ._Markers import _Markers
+from ._utils import _clean_matrix, _factorize, _restrict_features
 from .get_classic_markers import _get_classic_markers_raw
 
 
@@ -31,7 +33,7 @@ def __del__(self):
     def num_markers(self) -> int:
         """
         Returns:
-            int: Number of markers to be used for classification. This is the
+            Number of markers to be used for classification. This is the
             same as the size of the array from :py:meth:`~marker_subset`.
         """
         return lib.get_nsubset_from_single_reference(self._ptr)
@@ -39,7 +41,7 @@ def num_markers(self) -> int:
     def num_labels(self) -> int:
         """
         Returns:
-            int: Number of unique labels in this reference.
+            Number of unique labels in this reference.
         """
         return lib.get_nlabels_from_single_reference(self._ptr)
 
@@ -70,13 +72,14 @@ def markers(self) -> dict[Any, dict[Any, Sequence]]:
     def marker_subset(self, indices_only: bool = False) -> Union[ndarray, list]:
         """
         Args:
-            indices_only: Whether to return the markers as indices
+            indices_only:
+                Whether to return the markers as indices
                 into :py:attr:`~features`, or as a list of feature identifiers.
 
         Returns:
             If ``indices_only = False``, a list of feature identifiers for the markers.
 
-            If ``indices_only = True``, a NumPy array containing the integer indices of 
+            If ``indices_only = True``, a NumPy array containing the integer indices of
             features in ``features`` that were chosen as markers.
         """
         nmarkers = self.num_markers()
@@ -104,7 +107,8 @@ def build_single_reference(
     """Build a single reference dataset in preparation for classification.
 
     Args:
-        ref_data: A matrix-like object where rows are features, columns are
+        ref_data:
+            A matrix-like object where rows are features, columns are
             reference profiles, and each entry is the expression value.
             If `markers` is not provided, expression should be normalized
             and log-transformed in preparation for marker prioritization via
@@ -115,13 +119,16 @@ def build_single_reference(
             :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`
             containing such a matrix in one of its assays.
 
-        labels: Sequence of labels for each reference profile,
+        labels:
+            Sequence of labels for each reference profile,
             i.e., column in ``ref``.
 
-        features: Sequence of identifiers for each feature,
+        features:
+            Sequence of identifiers for each feature,
             i.e., row in ``ref``.
 
-        assay_type: Assay containing the expression matrix,
+        assay_type:
+            Assay containing the expression matrix,
             if `ref_data` is a
             :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
 
@@ -159,7 +166,7 @@ def build_single_reference(
             Number of threads to use for reference building.
 
     Returns:
-        The pre-built reference, ready for use in downstream methods like 
+        The pre-built reference, ready for use in downstream methods like
         :py:meth:`~singler.classify_single_reference.classify_single_reference`.
     """
 
@@ -183,7 +190,7 @@ def build_single_reference(
                 **marker_args,
             )
             markers = mrk.to_dict(lablev, ref_features)
-            labind = array(_match(ref_labels, lablev), dtype=int32)
+            labind = array(ut.match(ref_labels, lablev), dtype=int32)
         else:
             raise NotImplementedError("other marker methods are not implemented, sorry")
     else:
diff --git a/src/singler/classify_integrated_references.py b/src/singler/classify_integrated_references.py
index 756c97c..79f1845 100644
--- a/src/singler/classify_integrated_references.py
+++ b/src/singler/classify_integrated_references.py
@@ -1,12 +1,13 @@
-from typing import Sequence, Union, Any
-from numpy import array, ndarray, int32, float64, uintp
-from mattress import tatamize, TatamiNumericPointer
+from typing import Any, Sequence, Union
+
+import biocutils as ut
 from biocframe import BiocFrame
+from mattress import TatamiNumericPointer, tatamize
+from numpy import array, float64, int32, ndarray, uintp
 from summarizedexperiment import SummarizedExperiment
 
-from .build_integrated_references import IntegratedReferences
 from . import _cpphelpers as lib
-from ._utils import _match
+from .build_integrated_references import IntegratedReferences
 
 
 def classify_integrated_references(
@@ -20,7 +21,8 @@ def classify_integrated_references(
     """Integrate classification results across multiple references for a single test dataset.
 
     Args:
-        test_data: A matrix-like object where each row is a feature and each column
+        test_data:
+            A matrix-like object where each row is a feature and each column
             is a test sample (usually a single cell), containing expression values.
             Normalized and/or transformed expression values are also acceptable as only
             the ranking is used within this function.
@@ -103,7 +105,7 @@ def classify_integrated_references(
                 "each entry of 'results' should have results for all cells in 'test_data'"
             )
 
-        ind = array(_match(curlabs, all_labels[i]), dtype=int32)
+        ind = array(ut.match(curlabs, all_labels[i]), dtype=int32)
         coerced_labels.append(ind)
         assign_ptrs[i] = ind.ctypes.data
 
diff --git a/src/singler/classify_single_reference.py b/src/singler/classify_single_reference.py
index d33eac8..4643e44 100644
--- a/src/singler/classify_single_reference.py
+++ b/src/singler/classify_single_reference.py
@@ -1,11 +1,11 @@
-from mattress import tatamize
-from numpy import ndarray, int32, float64, uintp
+from typing import Any, Sequence, Union
+
 from biocframe import BiocFrame
-from typing import Sequence, Any, Union
+from numpy import float64, int32, ndarray, uintp
 
-from .build_single_reference import SinglePrebuiltReference
 from . import _cpphelpers as lib
-from ._utils import _create_map, _clean_matrix
+from ._utils import _clean_matrix, _create_map
+from .build_single_reference import SinglePrebuiltReference
 
 
 def classify_single_reference(
@@ -23,7 +23,8 @@ def classify_single_reference(
     using the SingleR algorithm.
 
     Args:
-        test_data: A matrix-like object where each row is a feature and each column
+        test_data:
+            A matrix-like object where each row is a feature and each column
             is a test sample (usually a single cell), containing expression values.
             Normalized and transformed expression values are also acceptable as only
             the ranking is used within this function.
@@ -32,14 +33,16 @@ def classify_single_reference(
             :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`
             containing such a matrix in one of its assays.
 
-        test_features: Sequence of identifiers for each feature in the test
+        test_features:
+            Sequence of identifiers for each feature in the test
             dataset, i.e., row in ``test_data``.
 
         ref_prebuilt:
             A pre-built reference created with
             :py:meth:`~singler.build_single_reference.build_single_reference`.
 
-        assay_type: Assay containing the expression matrix,
+        assay_type: 
+            Assay containing the expression matrix,
             if `test_data` is a
             :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
 
diff --git a/src/singler/fetch_reference.py b/src/singler/fetch_reference.py
index 26c2967..7449364 100644
--- a/src/singler/fetch_reference.py
+++ b/src/singler/fetch_reference.py
@@ -1,13 +1,13 @@
-import urllib.request as req
-import urllib.parse
-import summarizedexperiment
-import tempfile
-import os
 import gzip
+import os
+import tempfile
+import urllib.parse
+import urllib.request as req
+from typing import Any, Literal, Optional, Sequence, Union
+
 import biocframe
 import numpy
-from typing import Literal, Any, Sequence, Optional, Union
-
+import summarizedexperiment
 
 SESSION_DIR = None
 
@@ -23,7 +23,7 @@
 
 
 def fetch_github_reference(
-    name: KNOWN_REFERENCE, cache_dir: str = None, multiple_ids: bool = False
+    name: KNOWN_REFERENCE, cache_dir: Optional[str] = None, multiple_ids: bool = False
 ) -> summarizedexperiment.SummarizedExperiment:
     """Fetch a reference dataset from the
     `pre-compiled GitHub registry <https://github.com/kanaverse/singlepp-references>`_,
@@ -181,7 +181,7 @@ def fetch_github_reference(
             sample += 1
 
     return summarizedexperiment.SummarizedExperiment(
-        {"ranks": mat}, row_data=row_data, col_data=col_data, metadata=markers
+        {"ranks": mat}, row_data=row_data, column_data=col_data, metadata=markers
     )
 
 
@@ -190,7 +190,7 @@ def realize_github_markers(
     features: Sequence,
     num_markers: Optional[int] = None,
     restrict_to: Optional[Union[set, dict]] = None,
-) -> dict[Any, dict[Any, Sequence]]: 
+) -> dict[Any, dict[Any, Sequence]]:
     """Convert marker indices from a GitHub reference dataset into feature identifiers.  This allows the markers to be
     used in :py:meth:`~singler.build_single_reference.build_single_reference`.
 
diff --git a/src/singler/get_classic_markers.py b/src/singler/get_classic_markers.py
index a7279fe..2ed9fa2 100644
--- a/src/singler/get_classic_markers.py
+++ b/src/singler/get_classic_markers.py
@@ -1,17 +1,18 @@
-from numpy import ndarray, int32, uintp
-from mattress import tatamize
-from typing import Union, Sequence, Optional, Any
+from typing import Any, Optional, Sequence, Union
+
 import delayedarray
+from mattress import tatamize
+from numpy import int32, ndarray, uintp
 
 from . import _cpphelpers as lib
+from ._Markers import _Markers
 from ._utils import (
     _clean_matrix,
-    _stable_intersect,
-    _stable_union,
     _create_map,
     _restrict_features,
+    _stable_intersect,
+    _stable_union,
 )
-from ._Markers import _Markers
 
 
 def _get_classic_markers_raw(
diff --git a/tests/test_annotate_single.py b/tests/test_annotate_single.py
index be5cc54..540a43f 100644
--- a/tests/test_annotate_single.py
+++ b/tests/test_annotate_single.py
@@ -96,5 +96,5 @@ def test_annotate_single_github():
         cache_dir="_cache",
     )
 
-    ref_labels = list(set(se.col_data.column("main")))
+    ref_labels = list(set(se.column_data.column("main")))
     assert len(more_output.metadata["markers"][ref_labels[0]][ref_labels[1]]) == 10
diff --git a/tests/test_build_integrated_references.py b/tests/test_build_integrated_references.py
index 496d2c1..0ea2f93 100644
--- a/tests/test_build_integrated_references.py
+++ b/tests/test_build_integrated_references.py
@@ -25,7 +25,8 @@ def test_build_integrated_references():
     )
 
     assert integrated.reference_names == None
-    assert integrated.reference_labels == [["A", "B", "C", "D", "E"], ["z", "y", "x"]]
+    assert list(integrated.reference_labels[0]) == ["A", "B", "C", "D", "E"]
+    assert list(integrated.reference_labels[1]) == ["z", "y", "x"]
     assert integrated.test_features == test_features
 
     # Works in parallel.
diff --git a/tests/test_classify_integrated_references.py b/tests/test_classify_integrated_references.py
index ac15146..0663a8b 100644
--- a/tests/test_classify_integrated_references.py
+++ b/tests/test_classify_integrated_references.py
@@ -70,4 +70,4 @@ def test_classify_integrated_references():
 
     assert results.shape[0] == 50
     assert set(results.column("best_reference")) == set([0, 1])
-    assert results.column("scores").column_names == ['0', '1']
+    assert list(results.column("scores").column_names) == ['0', '1']
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 2c62de0..2cbdaf8 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,5 @@
 from singler._utils import (
     _factorize,
-    _match,
     _stable_intersect,
     _stable_union,
     _clean_matrix,
@@ -12,36 +11,18 @@
 
 def test_factorize():
     lev, ind = _factorize([1, 3, 5, 5, 3, 1])
-    assert lev == [1, 3, 5]
-    assert ind == [0, 1, 2, 2, 1, 0]
+    assert list(lev) == ["1", "3", "5"]
+    assert (ind == [0, 1, 2, 2, 1, 0]).all()
 
     # Preserves the order.
     lev, ind = _factorize(["C", "D", "A", "B", "C", "A"])
-    assert lev == ["C", "D", "A", "B"]
-    assert ind == [0, 1, 2, 3, 0, 2]
+    assert list(lev) == ["C", "D", "A", "B"]
+    assert (ind == [0, 1, 2, 3, 0, 2]).all()
 
     # Handles None-ness.
     lev, ind = _factorize([1, None, 5, None, 3, None])
-    assert lev == [1, 5, 3]
-    assert ind == [0, None, 1, None, 2, None]
-
-
-def test_match():
-    mm = _match(["A", "C", "B", "D", "A", "A", "C", "D", "B"], ["D", "C", "B", "A"])
-    assert list(mm) == [3, 1, 2, 0, 3, 3, 1, 0, 2]
-
-    # Handles duplicate targets.
-    x = [5, 1, 2, 3, 5, 6, 7, 7, 2, 1]
-    mm = _match(x, [1, 2, 3, 3, 5, 6, 1, 7, 6])
-    assert mm == [4, 0, 1, 2, 4, 5, 7, 7, 1, 0]
-
-    # Handles None-ness.
-    mm = _match(["A", None, "B", "D", None, "A", "C", None, "B"], ["D", "C", "B", "A"])
-    assert list(mm) == [3, None, 2, 0, None, 3, 1, None, 2]
-
-    mm = _match(["A", "B", "D", "A", "C", "B"], ["D", None, "C", "B", None, "A"])
-    assert list(mm) == [5, 3, 0, 5, 2, 3]
-
+    assert list(lev) == ["1", "5", "3"]
+    assert (ind == [0, -1, 1, -1, 2, -1]).all()
 
 def test_intersect():
     # Preserves the order in the first argument.