Skip to content

Commit

Permalink
port ASV time benchmarks to pytest-benchmark, keep memory usage ones …
Browse files Browse the repository at this point in the history
…for now
  • Loading branch information
luizirber committed Jun 30, 2024
1 parent 7f4cffa commit cbce8b3
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 126 deletions.
124 changes: 0 additions & 124 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,74 +33,6 @@ def load_sequences():
return sequences


class TimeMinHashSuite:
def setup(self):
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
self.protein_mh = MinHash(
MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False
)
self.sequences = load_sequences()

self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
for seq in self.sequences:
self.populated_mh.add_sequence(seq)

def time_add_sequence(self):
mh = self.mh
sequences = self.sequences
for seq in sequences:
mh.add_sequence(seq)

def time_add_protein(self):
mh = self.protein_mh
sequences = self.sequences
for seq in sequences:
mh.add_protein(seq)

def time_get_mins(self):
mh = self.populated_mh
for i in range(GET_MINS_RANGE):
mh.get_mins()

def time_add_hash(self):
mh = self.mh
for i in range(ADD_HASH_RANGE):
mh.add_hash(i)

def time_add_many(self):
mh = self.mh
mh.add_many(list(range(ADD_MANY_RANGE)))

def time_similarity(self):
mh = self.mh
other_mh = self.populated_mh
for i in range(SIMILARITY_TIMES):
mh.similarity(other_mh)

def time_count_common(self):
mh = self.mh
other_mh = self.populated_mh
for i in range(COUNT_COMMON_TIMES):
mh.count_common(other_mh)

def time_merge(self):
mh = self.mh
other_mh = self.populated_mh
for i in range(MERGE_TIMES):
mh.merge(other_mh)

def time_copy(self):
mh = self.populated_mh
for i in range(COPY_TIMES):
mh.__copy__()

def time_concat(self):
mh = self.mh
other_mh = self.populated_mh
for i in range(CONCAT_TIMES):
mh += other_mh


class PeakmemMinHashSuite:
def setup(self):
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
Expand Down Expand Up @@ -134,33 +66,6 @@ def peakmem_add_many(self):
####################


class TimeMinAbundanceSuite(TimeMinHashSuite):
def setup(self):
TimeMinHashSuite.setup(self)
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)

self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
for seq in self.sequences:
self.populated_mh.add_sequence(seq)

def time_get_mins_abundance(self):
mh = self.populated_mh
for i in range(GET_MINS_RANGE):
mh.get_mins(with_abundance=True)

def time_set_abundances(self):
mh = self.mh
mins = self.populated_mh.get_mins(with_abundance=True)
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins)

def time_set_abundances_noclear(self):
mh = self.mh
mins = self.populated_mh.get_mins(with_abundance=True)
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins, clear=False)


class PeakmemMinAbundanceSuite(PeakmemMinHashSuite):
def setup(self):
PeakmemMinHashSuite.setup(self)
Expand All @@ -170,35 +75,6 @@ def setup(self):
####################


class TimeZipStorageSuite:
def setup(self):
import zipfile

self.zipfile = NamedTemporaryFile()

with zipfile.ZipFile(
self.zipfile, mode="w", compression=zipfile.ZIP_STORED
) as storage:
for i in range(ZIP_STORAGE_WRITE):
# just so we have lots of entries
storage.writestr(str(i), b"0")
# one big-ish entry
storage.writestr("sig1", b"9" * 1_000_000)

def time_load_from_zipstorage(self):
with ZipStorage(self.zipfile.name) as storage:
for i in range(ZIP_STORAGE_LOAD):
storage.load("sig1")

def time_load_small_from_zipstorage(self):
with ZipStorage(self.zipfile.name) as storage:
for i in range(ZIP_STORAGE_LOAD):
storage.load("99999")

def teardown(self):
self.zipfile.close()


class PeakmemZipStorageSuite:
def setup(self):
import zipfile
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ test = [
"pytest>=6.2.4,<8.3.0",
"pytest-cov>=4,<6.0",
"pytest-xdist>=3.1",
"pytest-benchmark>=4.0",
"pyyaml>=6,<7",
"diff-cover>=7.3",
"covdefaults>=2.2.2",
Expand Down
193 changes: 193 additions & 0 deletions tests/test_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
import random
from tempfile import NamedTemporaryFile

import pytest

from sourmash.sbt_storage import ZipStorage
from sourmash.minhash import MinHash

RANDOM_SEQ_SIZE = 3000
RANDOM_SEQ_NUMBER = 300

MINHASH_NUM = 500
MINHASH_K = 21

GET_MINS_RANGE = 500
ADD_HASH_RANGE = 10_000
ADD_MANY_RANGE = 1000
SIMILARITY_TIMES = 500
COUNT_COMMON_TIMES = 500
MERGE_TIMES = 500
COPY_TIMES = 500
CONCAT_TIMES = 500
SET_ABUNDANCES_RANGE = 500
ZIP_STORAGE_WRITE = 100_000
ZIP_STORAGE_LOAD = 20


def load_sequences():
sequences = []
for _ in range(10):
random_seq = random.sample(
"A,C,G,T".split(",") * RANDOM_SEQ_SIZE, RANDOM_SEQ_NUMBER
)
sequences.append("".join(random_seq))
return sequences


@pytest.fixture
def mh():
return MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)

@pytest.fixture
def mh_protein():
return MinHash(
MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False
)


@pytest.fixture
def sequences():
return load_sequences()


@pytest.fixture
def populated_mh(sequences):
populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
for seq in sequences:
populated_mh.add_sequence(seq)
return populated_mh


def test_add_sequence(benchmark, mh, sequences):
@benchmark
def bench():
for seq in sequences:
mh.add_sequence(seq)


def test_add_protein(benchmark, mh_protein, sequences):
@benchmark
def bench():
for seq in sequences:
mh_protein.add_protein(seq)


def test_get_mins(benchmark, populated_mh):
@benchmark
def bench():
for _ in range(GET_MINS_RANGE):
populated_mh.get_mins()


def test_add_hash(benchmark, mh):
@benchmark
def bench():
for i in range(ADD_HASH_RANGE):
mh.add_hash(i)


def test_add_many(benchmark, mh):
benchmark(mh.add_many, list(range(ADD_MANY_RANGE)))


def test_similarity(benchmark, mh, populated_mh):
@benchmark
def bench():
for _ in range(SIMILARITY_TIMES):
mh.similarity(populated_mh)

def test_count_common(benchmark, mh, populated_mh):
@benchmark
def bench():
for _ in range(COUNT_COMMON_TIMES):
mh.count_common(populated_mh)


def test_merge(benchmark, mh, populated_mh):
@benchmark
def bench():
for i in range(MERGE_TIMES):
mh.merge(populated_mh)


def test_copy(benchmark, populated_mh):
@benchmark
def bench():
for i in range(COPY_TIMES):
populated_mh.__copy__()


def test_concat(benchmark, mh, populated_mh):
@benchmark
def bench():
nonlocal mh
for _ in range(CONCAT_TIMES):
mh += populated_mh

####################


def setup(self):
TimeMinHashSuite.setup(self)
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)

self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
for seq in self.sequences:
self.populated_mh.add_sequence(seq)

def time_get_mins_abundance(self):
mh = self.populated_mh
for i in range(GET_MINS_RANGE):
mh.get_mins(with_abundance=True)

def time_set_abundances(self):
mh = self.mh
mins = self.populated_mh.get_mins(with_abundance=True)
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins)

def time_set_abundances_noclear(self):
mh = self.mh
mins = self.populated_mh.get_mins(with_abundance=True)
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins, clear=False)


####################


@pytest.fixture
def zipstore():
import zipfile

zf = NamedTemporaryFile()

with zipfile.ZipFile(
zf, mode="w", compression=zipfile.ZIP_STORED
) as storage:
for i in range(ZIP_STORAGE_WRITE):
# just so we have lots of entries
storage.writestr(str(i), b"0")
# one big-ish entry
storage.writestr("sig1", b"9" * 1_000_000)

yield zf

zf.close()


def test_load_from_zipstorage(benchmark, zipstore):
@benchmark
def bench():
with ZipStorage(zipstore.name) as storage:
for _ in range(ZIP_STORAGE_LOAD):
storage.load("sig1")


def test_load_small_from_zipstorage(benchmark, zipstore):
@benchmark
def bench():
with ZipStorage(zipstore.name) as storage:
for _ in range(ZIP_STORAGE_LOAD):
storage.load("99999")
32 changes: 30 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,34 @@ commands =
asv machine --yes
asv continuous latest HEAD {posargs}

[testenv:benchmarks]
description = run pytest-benchmark for benchmarking
changedir = {toxinidir}
commands =
pytest \
--cov "{envsitepackagesdir}/sourmash" \
--cov-config "{toxinidir}/tox.ini" \
--cov-report= \
--junitxml {toxworkdir}/junit.benchmarks.xml \
--benchmark-only \
-n 0 \
{posargs:tests}

[testenv:codspeed]
description = run codspeed for benchmarking
deps =
pytest-codspeed
changedir = {toxinidir}
commands =
pytest \
--cov "{envsitepackagesdir}/sourmash" \
--cov-config "{toxinidir}/tox.ini" \
--cov-report= \
--junitxml {toxworkdir}/junit.codspeed.xml \
--codspeed \
-k benchmarks \
{posargs:tests}

[testenv:docs]
description = invoke sphinx-build to build the HTML docs
basepython = python3.10
Expand Down Expand Up @@ -180,7 +208,7 @@ commands =
coverage xml -i -o {toxworkdir}/coverage.xml
coverage html -i -d {toxworkdir}/htmlcov
diff-cover --compare-branch {env:DIFF_AGAINST:origin/latest} {toxworkdir}/coverage.xml
depends = py312, py311, py310, pypy3
depends = py312, py311, py310, pypy3, codspeed
pass_env = {[testenv]pass_env}
DIFF_AGAINST
set_env = COVERAGE_FILE={toxworkdir}/.coverage
Expand Down Expand Up @@ -233,7 +261,7 @@ source = src/sourmash/
python =
3.10: py310, docs, package_description, coverage
3.11: py311, coverage
3.12: py312, coverage
3.12: py312, coverage, codspeed
[flake8]
max-complexity = 22
Expand Down

0 comments on commit cbce8b3

Please sign in to comment.