diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py
index d517bf7b2f..93a8e1ab23 100644
--- a/benchmarks/benchmarks.py
+++ b/benchmarks/benchmarks.py
@@ -33,74 +33,6 @@ def load_sequences():
     return sequences
 
 
-class TimeMinHashSuite:
-    def setup(self):
-        self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
-        self.protein_mh = MinHash(
-            MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False
-        )
-        self.sequences = load_sequences()
-
-        self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
-        for seq in self.sequences:
-            self.populated_mh.add_sequence(seq)
-
-    def time_add_sequence(self):
-        mh = self.mh
-        sequences = self.sequences
-        for seq in sequences:
-            mh.add_sequence(seq)
-
-    def time_add_protein(self):
-        mh = self.protein_mh
-        sequences = self.sequences
-        for seq in sequences:
-            mh.add_protein(seq)
-
-    def time_get_mins(self):
-        mh = self.populated_mh
-        for i in range(GET_MINS_RANGE):
-            mh.get_mins()
-
-    def time_add_hash(self):
-        mh = self.mh
-        for i in range(ADD_HASH_RANGE):
-            mh.add_hash(i)
-
-    def time_add_many(self):
-        mh = self.mh
-        mh.add_many(list(range(ADD_MANY_RANGE)))
-
-    def time_similarity(self):
-        mh = self.mh
-        other_mh = self.populated_mh
-        for i in range(SIMILARITY_TIMES):
-            mh.similarity(other_mh)
-
-    def time_count_common(self):
-        mh = self.mh
-        other_mh = self.populated_mh
-        for i in range(COUNT_COMMON_TIMES):
-            mh.count_common(other_mh)
-
-    def time_merge(self):
-        mh = self.mh
-        other_mh = self.populated_mh
-        for i in range(MERGE_TIMES):
-            mh.merge(other_mh)
-
-    def time_copy(self):
-        mh = self.populated_mh
-        for i in range(COPY_TIMES):
-            mh.__copy__()
-
-    def time_concat(self):
-        mh = self.mh
-        other_mh = self.populated_mh
-        for i in range(CONCAT_TIMES):
-            mh += other_mh
-
-
 class PeakmemMinHashSuite:
     def setup(self):
         self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
@@ -134,33 +66,6 @@ def peakmem_add_many(self):
 ####################
 
 
-class TimeMinAbundanceSuite(TimeMinHashSuite):
-    def setup(self):
-        TimeMinHashSuite.setup(self)
-        self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
-
-        self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
-        for seq in self.sequences:
-            self.populated_mh.add_sequence(seq)
-
-    def time_get_mins_abundance(self):
-        mh = self.populated_mh
-        for i in range(GET_MINS_RANGE):
-            mh.get_mins(with_abundance=True)
-
-    def time_set_abundances(self):
-        mh = self.mh
-        mins = self.populated_mh.get_mins(with_abundance=True)
-        for i in range(SET_ABUNDANCES_RANGE):
-            mh.set_abundances(mins)
-
-    def time_set_abundances_noclear(self):
-        mh = self.mh
-        mins = self.populated_mh.get_mins(with_abundance=True)
-        for i in range(SET_ABUNDANCES_RANGE):
-            mh.set_abundances(mins, clear=False)
-
-
 class PeakmemMinAbundanceSuite(PeakmemMinHashSuite):
     def setup(self):
         PeakmemMinHashSuite.setup(self)
@@ -170,35 +75,6 @@ def setup(self):
 ####################
 
 
-class TimeZipStorageSuite:
-    def setup(self):
-        import zipfile
-
-        self.zipfile = NamedTemporaryFile()
-
-        with zipfile.ZipFile(
-            self.zipfile, mode="w", compression=zipfile.ZIP_STORED
-        ) as storage:
-            for i in range(ZIP_STORAGE_WRITE):
-                # just so we have lots of entries
-                storage.writestr(str(i), b"0")
-            # one big-ish entry
-            storage.writestr("sig1", b"9" * 1_000_000)
-
-    def time_load_from_zipstorage(self):
-        with ZipStorage(self.zipfile.name) as storage:
-            for i in range(ZIP_STORAGE_LOAD):
-                storage.load("sig1")
-
-    def time_load_small_from_zipstorage(self):
-        with ZipStorage(self.zipfile.name) as storage:
-            for i in range(ZIP_STORAGE_LOAD):
-                storage.load("99999")
-
-    def teardown(self):
-        self.zipfile.close()
-
-
 class PeakmemZipStorageSuite:
     def setup(self):
         import zipfile
diff --git a/pyproject.toml b/pyproject.toml
index ccd826f794..b6a7a27eb3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -103,6 +103,7 @@ test = [
   "pytest>=6.2.4,<8.3.0",
   "pytest-cov>=4,<6.0",
   "pytest-xdist>=3.1",
+  "pytest-benchmark>=4.0",
   "pyyaml>=6,<7",
   "diff-cover>=7.3",
   "covdefaults>=2.2.2",
diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py
new file mode 100644
index 0000000000..cc15efc5a9
--- /dev/null
+++ b/tests/test_benchmarks.py
@@ -0,0 +1,193 @@
+import random
+from tempfile import NamedTemporaryFile
+
+import pytest
+
+from sourmash.sbt_storage import ZipStorage
+from sourmash.minhash import MinHash
+
+RANDOM_SEQ_SIZE = 3000
+RANDOM_SEQ_NUMBER = 300
+
+MINHASH_NUM = 500
+MINHASH_K = 21
+
+GET_MINS_RANGE = 500
+ADD_HASH_RANGE = 10_000
+ADD_MANY_RANGE = 1000
+SIMILARITY_TIMES = 500
+COUNT_COMMON_TIMES = 500
+MERGE_TIMES = 500
+COPY_TIMES = 500
+CONCAT_TIMES = 500
+SET_ABUNDANCES_RANGE = 500
+ZIP_STORAGE_WRITE = 100_000
+ZIP_STORAGE_LOAD = 20
+
+
+def load_sequences():
+    sequences = []
+    for _ in range(10):
+        random_seq = random.sample(
+            "A,C,G,T".split(",") * RANDOM_SEQ_SIZE, RANDOM_SEQ_NUMBER
+        )
+        sequences.append("".join(random_seq))
+    return sequences
+
+
+@pytest.fixture
+def mh():
+    return MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
+
+@pytest.fixture
+def mh_protein():
+    return MinHash(
+        MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False
+    )
+
+
+@pytest.fixture
+def sequences():
+    return load_sequences()
+
+
+@pytest.fixture
+def populated_mh(sequences):
+    populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
+    for seq in sequences:
+        populated_mh.add_sequence(seq)
+    return populated_mh
+
+
+def test_add_sequence(benchmark, mh, sequences):
+    @benchmark
+    def bench():
+        for seq in sequences:
+            mh.add_sequence(seq)
+
+
+def test_add_protein(benchmark, mh_protein, sequences):
+    @benchmark
+    def bench():
+        for seq in sequences:
+            mh_protein.add_protein(seq)
+
+
+def test_get_mins(benchmark, populated_mh):
+    @benchmark
+    def bench():
+        for _ in range(GET_MINS_RANGE):
+            populated_mh.get_mins()
+
+
+def test_add_hash(benchmark, mh):
+    @benchmark
+    def bench():
+        for i in range(ADD_HASH_RANGE):
+            mh.add_hash(i)
+
+
+def test_add_many(benchmark, mh):
+    benchmark(mh.add_many, list(range(ADD_MANY_RANGE)))
+
+
+def test_similarity(benchmark, mh, populated_mh):
+    @benchmark
+    def bench():
+        for _ in range(SIMILARITY_TIMES):
+            mh.similarity(populated_mh)
+
+def test_count_common(benchmark, mh, populated_mh):
+    @benchmark
+    def bench():
+        for _ in range(COUNT_COMMON_TIMES):
+            mh.count_common(populated_mh)
+
+
+def test_merge(benchmark, mh, populated_mh):
+    @benchmark
+    def bench():
+        for i in range(MERGE_TIMES):
+            mh.merge(populated_mh)
+
+
+def test_copy(benchmark, populated_mh):
+    @benchmark
+    def bench():
+        for i in range(COPY_TIMES):
+            populated_mh.__copy__()
+
+
+def test_concat(benchmark, mh, populated_mh):
+    @benchmark
+    def bench():
+        nonlocal mh
+        for _ in range(CONCAT_TIMES):
+            mh += populated_mh
+
+####################
+
+
+def setup(self):
+    TimeMinHashSuite.setup(self)
+    self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
+
+    self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
+    for seq in self.sequences:
+        self.populated_mh.add_sequence(seq)
+
+def time_get_mins_abundance(self):
+    mh = self.populated_mh
+    for i in range(GET_MINS_RANGE):
+        mh.get_mins(with_abundance=True)
+
+def time_set_abundances(self):
+    mh = self.mh
+    mins = self.populated_mh.get_mins(with_abundance=True)
+    for i in range(SET_ABUNDANCES_RANGE):
+        mh.set_abundances(mins)
+
+def time_set_abundances_noclear(self):
+    mh = self.mh
+    mins = self.populated_mh.get_mins(with_abundance=True)
+    for i in range(SET_ABUNDANCES_RANGE):
+        mh.set_abundances(mins, clear=False)
+
+
+####################
+
+
+@pytest.fixture
+def zipstore():
+    import zipfile
+
+    zf = NamedTemporaryFile()
+
+    with zipfile.ZipFile(
+        zf, mode="w", compression=zipfile.ZIP_STORED
+    ) as storage:
+        for i in range(ZIP_STORAGE_WRITE):
+            # just so we have lots of entries
+            storage.writestr(str(i), b"0")
+        # one big-ish entry
+        storage.writestr("sig1", b"9" * 1_000_000)
+
+    yield zf
+
+    zf.close()
+
+
+def test_load_from_zipstorage(benchmark, zipstore):
+    @benchmark
+    def bench():
+        with ZipStorage(zipstore.name) as storage:
+            for _ in range(ZIP_STORAGE_LOAD):
+                storage.load("sig1")
+
+
+def test_load_small_from_zipstorage(benchmark, zipstore):
+    @benchmark
+    def bench():
+        with ZipStorage(zipstore.name) as storage:
+            for _ in range(ZIP_STORAGE_LOAD):
+                storage.load("99999")
diff --git a/tox.ini b/tox.ini
index ecf66a2bcd..c1237267da 100644
--- a/tox.ini
+++ b/tox.ini
@@ -111,6 +111,34 @@ commands =
     asv machine --yes
     asv continuous latest HEAD {posargs}
 
+[testenv:benchmarks]
+description = run pytest-benchmark for benchmarking
+changedir = {toxinidir}
+commands =
+    pytest \
+      --cov "{envsitepackagesdir}/sourmash" \
+      --cov-config "{toxinidir}/tox.ini" \
+      --cov-report= \
+      --junitxml {toxworkdir}/junit.benchmarks.xml \
+      --benchmark-only \
+      -n 0 \
+      {posargs:tests}
+
+[testenv:codspeed]
+description = run codspeed for benchmarking
+deps =
+    pytest-codspeed
+changedir = {toxinidir}
+commands =
+    pytest \
+      --cov "{envsitepackagesdir}/sourmash" \
+      --cov-config "{toxinidir}/tox.ini" \
+      --cov-report= \
+      --junitxml {toxworkdir}/junit.codspeed.xml \
+      --codspeed \
+      -k benchmarks \
+      {posargs:tests}
+
 [testenv:docs]
 description = invoke sphinx-build to build the HTML docs
 basepython = python3.10
@@ -180,7 +208,7 @@ commands =
     coverage xml -i -o {toxworkdir}/coverage.xml
     coverage html -i -d {toxworkdir}/htmlcov
     diff-cover --compare-branch {env:DIFF_AGAINST:origin/latest} {toxworkdir}/coverage.xml
-depends = py312, py311, py310, pypy3
+depends = py312, py311, py310, pypy3, codspeed
 pass_env = {[testenv]pass_env}
     DIFF_AGAINST
 set_env = COVERAGE_FILE={toxworkdir}/.coverage
@@ -233,7 +261,7 @@ source = src/sourmash/
 python =
     3.10: py310, docs, package_description, coverage
     3.11: py311, coverage
-    3.12: py312, coverage
+    3.12: py312, coverage, codspeed
 
 [flake8]
 max-complexity = 22