Skip to content

Commit d868720

Browse files
authored
Replace pandas with Biocframes (#13)
Search and List operations now return a `BiocFrame` instead of the pandas data frame containing the results.
1 parent 331b45e commit d868720

File tree

8 files changed

+24
-20
lines changed

8 files changed

+24
-20
lines changed

Diff for: .github/workflows/pypi-publish.yml renamed to .github/workflows/publish-pypi.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ jobs:
6161
- run: touch ./docs/_build/html/.nojekyll
6262

6363
- name: GH Pages Deployment
64-
uses: JamesIves/github-pages-deploy-action@4.1.3
64+
uses: JamesIves/github-pages-deploy-action@v4
6565
with:
6666
branch: gh-pages # The branch the action should deploy to.
6767
folder: ./docs/_build/html
@@ -74,7 +74,7 @@ jobs:
7474
LD_LIBRARY_PATH: /usr/local/lib
7575

7676
- name: Publish package
77-
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
77+
uses: pypa/gh-action-pypi-publish@v1.12.2
7878
with:
7979
user: __token__
8080
password: ${{ secrets.PYPI_PASSWORD }}

Diff for: .github/workflows/pypi-test.yml renamed to .github/workflows/run-tests.yml

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,13 @@ on:
44
push:
55
branches: [master]
66
pull_request:
7-
branches: [master]
87

98
jobs:
109
build:
1110
runs-on: ubuntu-latest
1211
strategy:
1312
matrix:
14-
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
13+
python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
1514

1615
name: Python ${{ matrix.python-version }}
1716
steps:

Diff for: CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## Version 0.3.0
4+
5+
- Replace pandas with BiocFrame.
6+
- Rename Github actions for consistency with the rest of the packages.
7+
38
## Version 0.2.0
49

510
- chore: Remove Python 3.8 (EOL)

Diff for: setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ install_requires =
5656
delayedarray>=0.5.1
5757
summarizedexperiment
5858
singlecellexperiment
59-
pandas
59+
biocframe
6060

6161
[options.packages.find]
6262
where = src

Diff for: src/celldex/list_references.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import sqlite3
33
from functools import lru_cache
44

5-
import pandas as pd
5+
from biocframe import BiocFrame
66
from gypsum_client import (
77
cache_directory,
88
fetch_metadata_database,
@@ -14,7 +14,7 @@
1414

1515

1616
@lru_cache
17-
def list_references(cache_dir: str = cache_directory(), overwrite: bool = False, latest: bool = True) -> pd.DataFrame:
17+
def list_references(cache_dir: str = cache_directory(), overwrite: bool = False, latest: bool = True) -> BiocFrame:
1818
"""List all available reference datasets.
1919
2020
Example:
@@ -36,7 +36,7 @@ def list_references(cache_dir: str = cache_directory(), overwrite: bool = False,
3636
Defaults to True.
3737
3838
Returns:
39-
A :py:class:`~pandas.DataFrame` where each row corresponds to a reference
39+
A :py:class:`~biocframe.BiocFrame` where each row corresponds to a reference
4040
dataset. Each row contains title and description for each reference,
4141
the number of rows and columns, the organisms and genome builds involved,
4242
whether the dataset has any pre-computed reduced dimensions, and so on.
@@ -83,7 +83,7 @@ def _format_query_results(results: list, key_names: list):
8383
def _sanitize_query_to_output(results: list, latest: bool, meta_name: str = "meta"):
8484
_all_paths = [None if "/" not in p else p.rsplit("/", 1)[0] for p in results["path"]]
8585

86-
df = pd.DataFrame(
86+
df = BiocFrame(
8787
{
8888
"name": results["asset"],
8989
"version": results["version"],
@@ -148,10 +148,10 @@ def _sanitize_query_to_output(results: list, latest: bool, meta_name: str = "met
148148
for meta in _all_metas:
149149
cursources = meta.get("sources")
150150
if cursources is None:
151-
sources.append(pd.DataFrame(columns=["provider", "id", "version"]))
151+
sources.append(BiocFrame(columns=["provider", "id", "version"]))
152152
else:
153153
sources.append(
154-
pd.DataFrame(
154+
BiocFrame(
155155
{
156156
"provider": [s.get("provider") for s in cursources],
157157
"id": [s.get("id") for s in cursources],

Diff for: src/celldex/search_references.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from functools import lru_cache
33
from typing import Union
44

5-
import pandas as pd
5+
from biocframe import BiocFrame
66
from gypsum_client import cache_directory, fetch_metadata_database
77
from gypsum_client.search_metadata import (
88
GypsumSearchClause,
@@ -22,7 +22,7 @@ def search_references(
2222
cache_dir: str = cache_directory(),
2323
overwrite: bool = False,
2424
latest: bool = True,
25-
) -> pd.DataFrame:
25+
) -> BiocFrame:
2626
"""Search for reference datasets of interest based on matching text in the associated metadata.
2727
2828
This is a wrapper around
@@ -70,7 +70,7 @@ def search_references(
7070
Defaults to True.
7171
7272
Returns:
73-
A :py:class:`~pandas.DataFrame` where each row corresponds to
73+
A :py:class:`~biocframe.BiocFrame` where each row corresponds to
7474
a dataset, containing various columns of metadata.
7575
Some columns may be lists to capture 1:many mappings.
7676
"""

Diff for: tests/test_list_refs.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import tempfile
22

3-
import pandas as pd
3+
from biocframe import BiocFrame
44
from celldex import list_references, list_versions, fetch_latest_version
55

66
__author__ = "Jayaram Kancherla"
@@ -11,7 +11,7 @@
1111
def test_list_references():
1212
refs = list_references(cache_dir=tempfile.mkdtemp())
1313

14-
assert isinstance(refs, pd.DataFrame)
14+
assert isinstance(refs, BiocFrame)
1515
assert len(refs) >= 7
1616

1717

Diff for: tests/test_search_refs.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import pandas as pd
1+
from biocframe import BiocFrame
22
from gypsum_client import define_text_query
33
from celldex import search_references
44

@@ -10,12 +10,12 @@
1010
def test_search_references():
1111
res = search_references("human")
1212
assert len(res) > 3
13-
assert isinstance(res, pd.DataFrame)
13+
assert isinstance(res, BiocFrame)
1414

1515
res = search_references(define_text_query("Immun%", partial=True))
16-
assert isinstance(res, pd.DataFrame)
16+
assert isinstance(res, BiocFrame)
1717
assert len(res) > 0
1818

1919
res = search_references(define_text_query("10090", field="taxonomy_id"))
20-
assert isinstance(res, pd.DataFrame)
20+
assert isinstance(res, BiocFrame)
2121
assert len(res) > 0

0 commit comments

Comments
 (0)