Skip to content

Commit

Permalink
Parse H5 backed delayed objects
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche committed Nov 1, 2024
1 parent c8e0d7c commit 7604845
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/rds2py/generics.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
# multi assay experiment
"MultiAssayExperiment": "rds2py.read_mae.read_multi_assay_experiment",
"ExperimentList": "rds2py.read_dict.read_dict",
# delayed matrices
"H5SparseMatrix": "rds2py.read_delayed_matrix.read_hdf5_sparse",
}


Expand Down
31 changes: 31 additions & 0 deletions src/rds2py/read_delayed_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Functions and classes for parsing R delayed matrix objects from HDF5Array."""

from typing import Literal

from hdf5array import Hdf5CompressedSparseMatrix

from .generics import _dispatcher
from .rdsutils import get_class

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"


def read_hdf5_sparse(robject: dict, **kwargs) -> Hdf5CompressedSparseMatrix:
_cls = get_class(robject)

if _cls not in ["H5SparseMatrix"]:
raise RuntimeError(f"`robject` does not contain not a 'H5SparseMatrix' object, contains `{_cls}`.")

by_column = False
# get seed package name
_seed_cls = get_class(robject["attributes"]["seed"])
if _seed_cls in ["CSC_H5SparseMatrixSeed"]:
by_column = True

shape = _dispatcher(robject["attributes"]["seed"]["dim"], **kwargs)
fpath = list(_dispatcher(robject["attributes"]["seed"]["filepath"], **kwargs))[0]
group_name = list(_dispatcher(robject["attributes"]["seed"]["group"], **kwargs))[0]

return Hdf5CompressedSparseMatrix(path=fpath, group_name=group_name, shape=shape, by_column=by_column)

0 comments on commit 7604845

Please sign in to comment.