From e9e0eb6320619efa0ce547004868a88dab515de1 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Wed, 29 Jan 2025 21:53:19 -0800 Subject: [PATCH] Remove the confusing expansion of feature intervals (#18) --- CHANGELOG.md | 3 +- src/genomicarrays/GenomicArrayDataset.py | 13 ++++---- src/genomicarrays/GenomicArrayDatasetSlice.py | 32 +++++++++---------- tests/test_query.py | 12 ++++--- 4 files changed, 33 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7aad684..c0c01fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,9 @@ # Changelog -## Version 0.2.1 +## Version 0.2.1 - 0.2.2 - The aggregate function is expected to return either a scalar value or a 1-dimensional NumPy ndarray. If the later, users need to specify the expected dimension of the summarization. All values will be flattenned eventually. +- Remove expanding the intervals to conform to output length; this is now incompatible with coercions to anndata and summarized experiments and has been removed. ## Version 0.2.0 diff --git a/src/genomicarrays/GenomicArrayDataset.py b/src/genomicarrays/GenomicArrayDataset.py index b6a7850..2f76d28 100644 --- a/src/genomicarrays/GenomicArrayDataset.py +++ b/src/genomicarrays/GenomicArrayDataset.py @@ -287,22 +287,23 @@ def get_slice( if not isinstance(feature_subset, (int, slice)): raise TypeError("feature indices must be continous; either a 'slice' or 'int' index.") + _fsubset = self.get_feature_subset(feature_subset) start_findex = _fsubset["genarr_feature_start_index"].astype(int).min() end_findex = _fsubset["genarr_feature_end_index"].astype(int).max() # expand intervals - final_rows = [] - for row in _fsubset.itertuples(): - for i, _ in enumerate(range(int(row.genarr_feature_start_index), int(row.genarr_feature_end_index))): - final_rows.append(row._replace(starts=i + row.starts, ends=i + row.starts + 1)) - _feature_df = pd.DataFrame(final_rows) + # final_rows = [] + # for row in _fsubset.itertuples(): + # for i, _ in enumerate(range(int(row.genarr_feature_start_index), int(row.genarr_feature_end_index))): + # final_rows.append(row._replace(starts=i + row.starts, ends=i + row.starts + 1)) + # _feature_df = pd.DataFrame(final_rows) _msubset = self.get_matrix_subset((list(range(start_findex, end_findex)), _sample_indices)) return GenomicArrayDatasetSlice( _ssubset, - _feature_df, + _fsubset, _msubset, ) diff --git a/src/genomicarrays/GenomicArrayDatasetSlice.py b/src/genomicarrays/GenomicArrayDatasetSlice.py index 0c284ea..8b58aa1 100644 --- a/src/genomicarrays/GenomicArrayDatasetSlice.py +++ b/src/genomicarrays/GenomicArrayDatasetSlice.py @@ -51,22 +51,22 @@ class GenomicArrayDatasetSlice: ## Interop #### - def to_anndata(self): - """Convert the realized slice to :py:class:`~anndata.AnnData`.""" - return anndata.AnnData( - layers={"matrix": self.matrix.transpose()}, - obs=self.sample_metadata, - var=self.feature_annotation, - ) - - def to_rangedsummarizedexperiment(self): - """Convert the realized slice to - :py:class:`~summarizedexperiment.RangedSummarizedExperiment.RangedSummarizedExperiment`.""" - return se.RangedSummarizedExperiment( - assays={"matrix": self.matrix}, - row_ranges=gr.GenomicRanges.from_pandas(self.feature_annotation), - column_data=self.sample_metadata, - ) + # def to_anndata(self): + # """Convert the realized slice to :py:class:`~anndata.AnnData`.""" + # return anndata.AnnData( + # layers={"matrix": self.matrix.transpose()}, + # obs=self.sample_metadata, + # var=self.feature_annotation, + # ) + + # def to_rangedsummarizedexperiment(self): + # """Convert the realized slice to + # :py:class:`~summarizedexperiment.RangedSummarizedExperiment.RangedSummarizedExperiment`.""" + # return se.RangedSummarizedExperiment( + # assays={"matrix": self.matrix}, + # row_ranges=gr.GenomicRanges.from_pandas(self.feature_annotation), + # column_data=self.sample_metadata, + # ) #### ## Misc methods. diff --git a/tests/test_query.py b/tests/test_query.py index 1f87c56..3b4ea65 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -93,10 +93,12 @@ def test_query_agg(): result1 = cd.get_slice(slice(0, 5), slice(None)) assert result1 is not None + assert len(result1.feature_annotation) == 6 + assert len(result1.sample_metadata) == 2 assert result1.matrix.shape == (6, 2) - assert result1.to_anndata() is not None - assert result1.to_rangedsummarizedexperiment() is not None + # assert result1.to_anndata() is not None + # assert result1.to_rangedsummarizedexperiment() is not None def test_query_noagg(): @@ -165,7 +167,9 @@ def test_query_noagg(): result1 = cd.get_slice(slice(0, 5), slice(None)) assert result1 is not None + assert len(result1.feature_annotation) == 6 + assert len(result1.sample_metadata) == 2 assert result1.matrix.shape == (90, 2) - assert result1.to_anndata() is not None - assert result1.to_rangedsummarizedexperiment() is not None + # assert result1.to_anndata() is not None + # assert result1.to_rangedsummarizedexperiment() is not None