Skip to content

Commit

Permalink
Wrapper methods to combine multiple experiments (#83)
Browse files Browse the repository at this point in the history
- Add wrapper methods to combine Summarized and RangedSummarized by rows or columns.
- Implement getters and setters to access and modify an assay.
  • Loading branch information
jkanche authored Jan 2, 2025
1 parent f24d114 commit c8faec9
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 22 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## Version 0.5.1

- Add wrapper methods to combine Summarized and RangedSummarized by rows or columns.
- Implement getters and setters to access and modify an assay.

## Version 0.5.0

- chore: Remove Python 3.8 (EOL)
Expand Down
62 changes: 49 additions & 13 deletions src/summarizedexperiment/BaseSE.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,8 @@ def __deepcopy__(self, memo=None, _nil=[]):
_rows_copy = deepcopy(self._rows)
_cols_copy = deepcopy(self._cols)
_metadata_copy = deepcopy(self.metadata)
_row_names_copy = deepcopy(self._row_names)
_col_names_copy = deepcopy(self._column_names)
_row_names_copy = None if self._row_names is None else deepcopy(self._row_names)
_col_names_copy = None if self._column_names is None else deepcopy(self._column_names)

current_class_const = type(self)
return current_class_const(
Expand All @@ -270,12 +270,12 @@ def __copy__(self):
"""
current_class_const = type(self)
return current_class_const(
assays=self._assays,
row_data=self._rows,
column_data=self._cols,
row_names=self._row_names,
column_names=self._column_names,
metadata=self._metadata,
assays=self._assays.copy(),
row_data=self._rows.__copy__(),
column_data=self._cols.__copy__(),
row_names=None if self._row_names is None else self._row_names.copy(),
column_names=None if self._column_names is None else self._column_names.copy(),
metadata=self._metadata.copy(),
)

def copy(self):
Expand Down Expand Up @@ -924,7 +924,7 @@ def assay_names(self, names: List[str]):
######>> assay getters <<#######
################################

def assay(self, assay: Union[int, str]) -> Any:
def get_assay(self, assay: Union[int, str]) -> Any:
"""Convenience method to access an :py:attr:`~summarizedexperiment.BaseSE.BaseSE.assays` by name or index.
Args:
Expand All @@ -944,18 +944,54 @@ def assay(self, assay: Union[int, str]) -> Any:
if assay < 0:
raise IndexError("Index cannot be negative.")

if assay > len(self.assay_names):
if assay > len(self.get_assay_names()):
raise IndexError("Index greater than the number of assays.")

return self.assays[self.assay_names[assay]]
return self.assays[self.get_assay_names()[assay]]
elif isinstance(assay, str):
if assay not in self.assays:
if assay not in self._assays:
raise AttributeError(f"Assay: {assay} does not exist.")

return self.assays[assay]
return self._assays[assay]

raise TypeError(f"'assay' must be a string or integer, provided '{type(assay)}'.")

def assay(self, assay: Union[int, str]) -> Any:
"""Alias for :py:attr:`~assay`. For backwards compatibility"""
return self.get_assay(assay)

def set_assay(self, name: str, assay: Any, in_place: bool = False) -> "BaseSE":
"""Add or Replace :py:attr:`~summarizedexperiment.BaseSE.BaseSE.assays`'s.
Args:
name:
New or existing assay name.
assay:
A 2-dimensional matrix represented as either
:py:class:`~numpy.ndarray` or :py:class:`~scipy.sparse.spmatrix`.
Alternatively, you may use any 2-dimensional matrix that has
the ``shape`` property and implements the slice operation
using the ``__getitem__`` dunder method.
Dimensions of the matrix must match the shape of the
current experiment (number of rows, number of columns).
in_place:
Whether to modify the ``BaseSE`` in place.
Returns:
A modified ``BaseSE`` object, either as a copy of the original
or as a reference to the (in-place-modified) original.
"""
if assay.shape != self.shape:
raise ValueError("Porvided assay does not match the dimensions of the experiment.")

output = self._define_output(in_place)
output._assays[name] = assay
return output

##########################
######>> slicers <<#######
##########################
Expand Down
38 changes: 29 additions & 9 deletions src/summarizedexperiment/RangedSummarizedExperiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,8 @@ def __deepcopy__(self, memo=None, _nil=[]):
_rows_copy = deepcopy(self._rows)
_rowranges_copy = deepcopy(self._row_ranges)
_cols_copy = deepcopy(self._cols)
_row_names_copy = deepcopy(self._row_names)
_col_names_copy = deepcopy(self._column_names)
_row_names_copy = None if self._row_names is None else deepcopy(self._row_names)
_col_names_copy = None if self._column_names is None else deepcopy(self._column_names)
_metadata_copy = deepcopy(self.metadata)

current_class_const = type(self)
Expand All @@ -206,13 +206,13 @@ def __copy__(self):
"""
current_class_const = type(self)
return current_class_const(
assays=self._assays,
row_ranges=self._row_ranges,
row_data=self._rows,
column_data=self._cols,
row_names=self._row_names,
column_names=self._column_names,
metadata=self._metadata,
assays=self._assays.copy(),
row_ranges=self._row_ranges.__copy__(),
row_data=self._rows.__copy__(),
column_data=self._cols.__copy__(),
row_names=None if self._row_names is None else self._row_names.copy(),
column_names=None if self._column_names is None else self._column_names.copy(),
metadata=self._metadata.copy(),
)

def copy(self):
Expand Down Expand Up @@ -920,6 +920,26 @@ def sort(self, decreasing: bool = False, in_place: bool = False) -> "RangedSumma
output = self._define_output(in_place=in_place)
return output[list(_order),]

############################
######>> combine ops <<#####
############################

def relaxed_combine_rows(self, *other) -> "RangedSummarizedExperiment":
"""Wrapper around :py:func:`~relaxed_combine_rows`."""
return relaxed_combine_rows(self, *other)

def relaxed_combine_columns(self, *other) -> "RangedSummarizedExperiment":
"""Wrapper around :py:func:`~relaxed_combine_columns`."""
return relaxed_combine_columns(self, *other)

def combine_rows(self, *other) -> "RangedSummarizedExperiment":
"""Wrapper around :py:func:`~biocutils.combine_rows`."""
return combine_rows(self, *other)

def combine_columns(self, *other) -> "RangedSummarizedExperiment":
"""Wrapper around :py:func:`~biocutils.combine_columns`."""
return combine_columns(self, *other)


############################
######>> combine ops <<#####
Expand Down
20 changes: 20 additions & 0 deletions src/summarizedexperiment/SummarizedExperiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,26 @@ def __init__(
validate=validate,
)

############################
######>> combine ops <<#####
############################

def relaxed_combine_rows(self, *other) -> "SummarizedExperiment":
"""Wrapper around :py:func:`~relaxed_combine_rows`."""
return relaxed_combine_rows(self, *other)

def relaxed_combine_columns(self, *other) -> "SummarizedExperiment":
"""Wrapper around :py:func:`~relaxed_combine_columns`."""
return relaxed_combine_columns(self, *other)

def combine_rows(self, *other) -> "SummarizedExperiment":
"""Wrapper around :py:func:`~biocutils.combine_rows`."""
return combine_rows(self, *other)

def combine_columns(self, *other) -> "SummarizedExperiment":
"""Wrapper around :py:func:`~biocutils.combine_columns`."""
return combine_columns(self, *other)


############################
######>> combine ops <<#####
Expand Down
21 changes: 21 additions & 0 deletions tests/test_RSE_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,24 @@ def test_RSE_sort_order():
res = tse.sort(decreasing=True)
assert res is not None
assert len(res.row_ranges) == len(tse.row_ranges)

def test_SE_assay_getters_and_setters():
tse = RangedSummarizedExperiment(
assays={"counts": counts}, row_ranges=gr, column_data=col_data
)

assert tse is not None
assert isinstance(tse, RangedSummarizedExperiment)

assert tse.assay(0) is not None

new_tse = tse.set_assay("new_counts", assay=np.random.rand(nrows, ncols), in_place=False)
assert new_tse.get_assay_names() != tse.get_assay_names()
with pytest.raises(Exception):
tse.get_assay("new_counts")
assert new_tse.get_assay("new_counts") is not None

tse.set_assay("new_counts", assay=np.random.rand(nrows, ncols), in_place=True)
assert new_tse.get_assay_names() == tse.get_assay_names()
assert tse.get_assay("new_counts") is not None
assert new_tse.get_assay("new_counts") is not None
28 changes: 28 additions & 0 deletions tests/test_SE_combine_cols.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@ def test_SE_relaxed_combine_cols(summarized_experiments):
assert list(combined.row_data.column_names) == ["A"]
assert list(combined.column_data.column_names) == ["A", "B"]

combined2 = summarized_experiments.se_unnamed.relaxed_combine_columns(summarized_experiments.se_unnamed_2)

assert combined2 is not None
assert isinstance(combined2, SummarizedExperiment)
assert combined2.shape == (100, 20)
assert set(combined2.assay_names).issubset(["counts", "normalized"])
assert list(combined2.row_data.column_names) == ["A"]
assert list(combined2.column_data.column_names) == ["A", "B"]


def test_SE_combine_cols_with_names_mixed(summarized_experiments):
"""Test case to verify combine_cols() when the inputs have unnamed rows."""
Expand Down Expand Up @@ -72,6 +81,25 @@ def test_SE_combine_cols_with_names_mixed(summarized_experiments):
assert combined.column_names is not None
assert len(combined.column_names) == 6

combined2 = summarized_experiments.se1.relaxed_combine_columns(
summarized_experiments.se3
)

assert combined2 is not None
assert isinstance(combined2, SummarizedExperiment)
assert combined2.shape == (3, 6)
assert set(combined2.assay_names).issubset(["counts", "lognorm"])
assert list(combined2.row_data.column_names) == ["seqnames", "start", "end"]
assert list(combined2.column_data.column_names) == [
"sample",
"disease",
"doublet_score",
]
assert combined2.row_names is not None
assert len(combined2.row_names) == 3
assert combined2.column_names is not None
assert len(combined2.column_names) == 6


def test_SE_both_combine_cols_with_names(summarized_experiments):
"""Test case to verify combine_cols() when the inputs have unnamed rows."""
Expand Down
30 changes: 30 additions & 0 deletions tests/test_SE_combine_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ def test_SE_relaxed_combine_rows(summarized_experiments):
assert list(combined.row_data.column_names) == ["A", "B"]
assert list(combined.column_data.column_names) == ["A"]

combined2 = summarized_experiments.se_unnamed.relaxed_combine_rows(
summarized_experiments.se_unnamed_2
)

assert combined2 is not None
assert isinstance(combined2, SummarizedExperiment)
assert combined2.shape == (200, 10)
assert set(combined2.assay_names).issubset(["counts", "normalized"])
assert list(combined2.row_data.column_names) == ["A", "B"]
assert list(combined2.column_data.column_names) == ["A"]

def test_SE_combine_rows_with_names_mixed(summarized_experiments):
combined = biocutils.combine_rows(
Expand Down Expand Up @@ -68,6 +78,26 @@ def test_SE_combine_rows_with_names_mixed(summarized_experiments):
assert combined.column_names is not None
assert len(combined.column_names) == 3

combined2 = summarized_experiments.se1.relaxed_combine_rows(
summarized_experiments.se3
)

assert combined2 is not None
assert isinstance(combined2, SummarizedExperiment)
assert combined2.shape == (6, 3)
assert set(combined2.assay_names).issubset(["counts", "lognorm"])
assert list(combined2.row_data.column_names) == ["seqnames", "start", "end"]
assert list(combined2.column_data.column_names) == [
"sample",
"disease",
"doublet_score",
]
assert combined2.row_names is not None
assert len(combined2.row_names) == 6
assert combined2.column_names is not None
assert len(combined2.column_names) == 3



def test_SE_both_combine_rows_with_names(summarized_experiments):
combined = biocutils.combine_rows(
Expand Down
22 changes: 22 additions & 0 deletions tests/test_SE_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from biocframe import BiocFrame
import numpy as np
from summarizedexperiment.SummarizedExperiment import SummarizedExperiment
import pytest

__author__ = "jkanche"
__copyright__ = "jkanche"
Expand Down Expand Up @@ -103,3 +104,24 @@ def test_SE_assay():

assert tse.assay("counts") is not None
assert tse.assay(0) is not None

def test_SE_assay_getters_and_setters():
tse = SummarizedExperiment(
assays={"counts": counts}, row_data=row_data, column_data=col_data
)

assert tse is not None
assert isinstance(tse, SummarizedExperiment)

assert tse.assay(0) is not None

new_tse = tse.set_assay("new_counts", assay=np.random.rand(nrows, ncols), in_place=False)
assert new_tse.get_assay_names() != tse.get_assay_names()
with pytest.raises(Exception):
tse.get_assay("new_counts")
assert new_tse.get_assay("new_counts") is not None

tse.set_assay("new_counts", assay=np.random.rand(nrows, ncols), in_place=True)
assert new_tse.get_assay_names() == tse.get_assay_names()
assert tse.get_assay("new_counts") is not None
assert new_tse.get_assay("new_counts") is not None

0 comments on commit c8faec9

Please sign in to comment.