FIX: Pandas deprecations (#161)

* FIX: pd concat and update readme * FIX: pd squeeze * revert format changes and fix len * fix lint * readme clear mandatory instructions * remove unused transformer * update installation instructions * update Dockerimage to new version * add Docker image link Co-authored-by: Lina Kim <[email protected]> --------- Co-authored-by: Lina Kim <[email protected]>
bokulich-lab · Dec 5, 2023 · 1e16652 · 1e16652
1 parent 4943f83
commit 1e16652
Show file tree

Hide file tree

Showing 6 changed files with 21 additions and 25 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,6 +1,6 @@
 FROM mambaorg/micromamba
 ARG MAMBA_DOCKERFILE_ACTIVATE=1
-RUN micromamba install -y -c https://packages.qiime2.org/qiime2/2023.2/tested/ \
+RUN micromamba install -y -c https://packages.qiime2.org/qiime2/2023.7/tested/ \
 	-c conda-forge -c bioconda -c defaults \
 	q2cli q2-fondue
 ENV PATH /opt/conda/bin:$PATH

diff --git a/README.md b/README.md
@@ -23,12 +23,13 @@ conda install mamba -n base -c conda-forge
 * Create and activate a conda environment with the required dependencies:
 ```shell
 mamba create -y -n fondue \
-   -c https://packages.qiime2.org/qiime2/2023.2/tested/ \
+   -c https://packages.qiime2.org/qiime2/2023.7/tested/ \
    -c conda-forge -c bioconda -c defaults \
    q2cli q2-fondue
 
 conda activate fondue
 ```
+Now, don't forget to run [the mandatory configuration step](#mandatory-configuration-for-both-options-1-and-2)!
 
 ### Option 2: Install fondue within existing QIIME 2 environment
 * Install QIIME 2 within a conda environment as described in [the official user documentation](https://docs.qiime2.org/). 
@@ -40,6 +41,7 @@ mamba install -y \
    -c conda-forge -c bioconda -c defaults \
    q2-fondue
 ```
+Now, don't forget to run [the mandatory configuration step](#mandatory-configuration-for-both-options-1-and-2)!
 
 ### Mandatory configuration for both options 1 and 2
 * Refresh the QIIME 2 CLI cache and see that everything worked:
@@ -65,9 +67,9 @@ vdb-config --proxy <your proxy URL> --proxy-disable no
 Use containerization to integrate q2-fondue into your pipelines, or simply run reproducibly without the need for heavyweight package managers. [Read more about Docker here.](https://www.docker.com/get-started/)
 
 * Install [Docker](https://docs.docker.com/engine/install/) with the linked instructions
-* Pull the [q2-fondue Docker image](https://hub.docker.com/layers/linathekim/q2-fondue/2023.2/images/sha256-214d0575eb4eaf435c5c4a7d29edf0fc082e47999b884b52a173f2ec469975f2?context=repo):
+* Pull the [q2-fondue Docker image](https://hub.docker.com/layers/linathekim/q2-fondue/2023.7/images/sha256-f5d26959ac035811a8f34e2a46f6cc381f9a4ce21b3604a196c1ee176ba708e7?context=repo):
 ```shell
-docker pull linathekim/q2-fondue:2023.2
+docker pull linathekim/q2-fondue:2023.7
 ```
 * Within the container, refresh the QIIME 2 CLI cache to see that everything worked:
 ```shell
@@ -89,15 +91,15 @@ To find out which temporary directory is used by Qiime 2, you can run `echo $TMP
 ### Available actions
 q2-fondue provides a couple of actions to fetch and manipulate nucleotide sequencing data and related metadata from SRA as well as an action to scrape run, study, BioProject, experiment and sample IDs from a Zotero web library. Below you will find a list of available actions and their short descriptions.
 
-| Action               | Description                                                              |
-|----------------------|--------------------------------------------------------------------------|
-| `get-sequences`      | Fetch sequences by IDs[*] from the SRA repository.        |
-| `get-metadata`       | Fetch metadata by IDs[*] from the SRA repository.         |
-| `get-all`            | Fetch sequences and metadata by IDs[*] from the SRA repo. |
-| `get-ids-from-query` | Find SRA run accession IDs based on a search query. |
-| `merge-metadata`     | Merge several metadata files into a single metadata object.              |
-| `combine-seqs`       | Combine sequences from multiple artifacts into a single artifact.        |
-| `scrape-collection`  | Scrape Zotero collection for IDs[*] and associated DOI names.|
+| Action               | Description                                                       |
+|----------------------|-------------------------------------------------------------------|
+| `get-sequences`      | Fetch sequences by IDs[*] from the SRA repository.                |
+| `get-metadata`       | Fetch metadata by IDs[*] from the SRA repository.                 |
+| `get-all`            | Fetch sequences and metadata by IDs[*] from the SRA repo.         |
+| `get-ids-from-query` | Find SRA run accession IDs based on a search query.               |
+| `merge-metadata`     | Merge several metadata files into a single metadata object.       |
+| `combine-seqs`       | Combine sequences from multiple artifacts into a single artifact. |
+| `scrape-collection`  | Scrape Zotero collection for IDs[*] and associated DOI names.     |
 
 [*]: Supported IDs include run, study, BioProject, experiment and study IDs.
 

diff --git a/q2_fondue/get_all.py b/q2_fondue/get_all.py
@@ -40,8 +40,8 @@ def get_all(
     seq_single, seq_paired, failed_ids, = get_sequences(
         run_ids, email, retries, n_jobs, log_level
     )
-
-    failed_ids_df = failed_ids_df.append(failed_ids.view(pd.DataFrame))
+    failed_ids_df = pd.concat(
+        [failed_ids_df, failed_ids.view(pd.DataFrame)])
     if failed_ids_df.shape[0] > 0:
         failed_ids = Artifact.import_data('SRAFailedIDs', failed_ids_df)
 

diff --git a/q2_fondue/tests/test_query.py b/q2_fondue/tests/test_query.py
@@ -28,7 +28,8 @@ def test_query(self, mock_ids):
         obs_ids, = fondue.actions.get_ids_from_query(
             query, '[email protected]', 1, 'DEBUG'
         )
-        exp_ids = pd.DataFrame(index=pd.Index(['SRR123', 'SRR234'], name='ID'))
+        exp_ids = pd.DataFrame(
+            index=pd.Index(['SRR123', 'SRR234'], name='ID'), columns=[], )
 
         mock_ids.assert_called_once_with(
             '[email protected]', 1, None, query, 'biosample', 'DEBUG'

diff --git a/q2_fondue/types/_transformer.py b/q2_fondue/types/_transformer.py
@@ -21,12 +21,6 @@ def _meta_fmt_to_metadata(ff):
         return qiime2.Metadata(df)
 
 
-def _meta_fmt_to_series(ff):
-    with ff.open() as fh:
-        s = pd.read_csv(fh, header=0, dtype='str', squeeze=True)
-        return s
-
-
 def _series_to_meta_fmt(data: pd.Series, meta_fmt):
     with meta_fmt.open() as fh:
         data.to_csv(fh, sep='\t', header=True, index=False)

diff --git a/q2_fondue/types/tests/test_types_formats_transformers.py b/q2_fondue/types/tests/test_types_formats_transformers.py
@@ -178,7 +178,7 @@ def setUp(self):
         ncbi_ids_path = self.get_data_path('ncbi-ids-runs.tsv')
         self.ncbi_ids = NCBIAccessionIDsFormat(ncbi_ids_path, mode='r')
         self.ncbi_ids_ser = pd.read_csv(
-            ncbi_ids_path, header=0, dtype='str', squeeze=True)
+            ncbi_ids_path, header=0, dtype='str').squeeze()
         self.ncbi_ids_df = pd.read_csv(
             ncbi_ids_path, sep='\t', header=0, index_col=0, dtype='str')
 
@@ -234,8 +234,7 @@ def test_series_to_ncbi_accession_ids(self):
         self.assertIsInstance(obs, NCBIAccessionIDsFormat)
 
         obs = pd.read_csv(
-            str(obs), header=0, dtype='str', squeeze=True
-        )
+            str(obs), header=0, dtype='str').squeeze()
         pd.testing.assert_series_equal(obs, self.ncbi_ids_ser)
 
     def test_dataframe_to_ncbi_accession_ids(self):