Skip to content

Commit

Permalink
Remove the confusing expansion of feature intervals (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche authored Jan 30, 2025
1 parent d3cc814 commit e9e0eb6
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 27 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Changelog

## Version 0.2.1
## Version 0.2.1 - 0.2.2

- The aggregate function is expected to return either a scalar value or a 1-dimensional NumPy ndarray. If the later, users need to specify the expected dimension of the summarization. All values will be flattenned eventually.
- Remove expanding the intervals to conform to output length; this is now incompatible with coercions to anndata and summarized experiments and has been removed.

## Version 0.2.0

Expand Down
13 changes: 7 additions & 6 deletions src/genomicarrays/GenomicArrayDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,22 +287,23 @@ def get_slice(

if not isinstance(feature_subset, (int, slice)):
raise TypeError("feature indices must be continous; either a 'slice' or 'int' index.")

_fsubset = self.get_feature_subset(feature_subset)
start_findex = _fsubset["genarr_feature_start_index"].astype(int).min()
end_findex = _fsubset["genarr_feature_end_index"].astype(int).max()

# expand intervals
final_rows = []
for row in _fsubset.itertuples():
for i, _ in enumerate(range(int(row.genarr_feature_start_index), int(row.genarr_feature_end_index))):
final_rows.append(row._replace(starts=i + row.starts, ends=i + row.starts + 1))
_feature_df = pd.DataFrame(final_rows)
# final_rows = []
# for row in _fsubset.itertuples():
# for i, _ in enumerate(range(int(row.genarr_feature_start_index), int(row.genarr_feature_end_index))):
# final_rows.append(row._replace(starts=i + row.starts, ends=i + row.starts + 1))
# _feature_df = pd.DataFrame(final_rows)

_msubset = self.get_matrix_subset((list(range(start_findex, end_findex)), _sample_indices))

return GenomicArrayDatasetSlice(
_ssubset,
_feature_df,
_fsubset,
_msubset,
)

Expand Down
32 changes: 16 additions & 16 deletions src/genomicarrays/GenomicArrayDatasetSlice.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,22 +51,22 @@ class GenomicArrayDatasetSlice:
## Interop
####

def to_anndata(self):
"""Convert the realized slice to :py:class:`~anndata.AnnData`."""
return anndata.AnnData(
layers={"matrix": self.matrix.transpose()},
obs=self.sample_metadata,
var=self.feature_annotation,
)

def to_rangedsummarizedexperiment(self):
"""Convert the realized slice to
:py:class:`~summarizedexperiment.RangedSummarizedExperiment.RangedSummarizedExperiment`."""
return se.RangedSummarizedExperiment(
assays={"matrix": self.matrix},
row_ranges=gr.GenomicRanges.from_pandas(self.feature_annotation),
column_data=self.sample_metadata,
)
# def to_anndata(self):
# """Convert the realized slice to :py:class:`~anndata.AnnData`."""
# return anndata.AnnData(
# layers={"matrix": self.matrix.transpose()},
# obs=self.sample_metadata,
# var=self.feature_annotation,
# )

# def to_rangedsummarizedexperiment(self):
# """Convert the realized slice to
# :py:class:`~summarizedexperiment.RangedSummarizedExperiment.RangedSummarizedExperiment`."""
# return se.RangedSummarizedExperiment(
# assays={"matrix": self.matrix},
# row_ranges=gr.GenomicRanges.from_pandas(self.feature_annotation),
# column_data=self.sample_metadata,
# )

####
## Misc methods.
Expand Down
12 changes: 8 additions & 4 deletions tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,12 @@ def test_query_agg():

result1 = cd.get_slice(slice(0, 5), slice(None))
assert result1 is not None
assert len(result1.feature_annotation) == 6
assert len(result1.sample_metadata) == 2
assert result1.matrix.shape == (6, 2)

assert result1.to_anndata() is not None
assert result1.to_rangedsummarizedexperiment() is not None
# assert result1.to_anndata() is not None
# assert result1.to_rangedsummarizedexperiment() is not None


def test_query_noagg():
Expand Down Expand Up @@ -165,7 +167,9 @@ def test_query_noagg():

result1 = cd.get_slice(slice(0, 5), slice(None))
assert result1 is not None
assert len(result1.feature_annotation) == 6
assert len(result1.sample_metadata) == 2
assert result1.matrix.shape == (90, 2)

assert result1.to_anndata() is not None
assert result1.to_rangedsummarizedexperiment() is not None
# assert result1.to_anndata() is not None
# assert result1.to_rangedsummarizedexperiment() is not None

0 comments on commit e9e0eb6

Please sign in to comment.