diff --git a/project/00_0_0_lftp_upload_commands.ipynb b/project/00_0_0_lftp_upload_commands.ipynb index 1d9ae4337..3ea94597a 100644 --- a/project/00_0_0_lftp_upload_commands.ipynb +++ b/project/00_0_0_lftp_upload_commands.ipynb @@ -113,7 +113,7 @@ "metadata": {}, "outputs": [], "source": [ - "meta_stats = df_meta.describe(include='all', datetime_is_numeric=True)\n", + "meta_stats = df_meta.describe(include='all')\n", "meta_stats.T" ] }, diff --git a/project/00_0_0_lftp_upload_commands.py b/project/00_0_0_lftp_upload_commands.py index 17cc26be2..03b71f264 100644 --- a/project/00_0_0_lftp_upload_commands.py +++ b/project/00_0_0_lftp_upload_commands.py @@ -71,7 +71,7 @@ def rename(fname, new_sample_id, new_folder=None, ext=None): # %% -meta_stats = df_meta.describe(include='all', datetime_is_numeric=True) +meta_stats = df_meta.describe(include='all') meta_stats.T # %% [markdown] diff --git a/project/00_0_hela_metadata_rawfiles.ipynb b/project/00_0_hela_metadata_rawfiles.ipynb index 3971259e4..b4ca77ca5 100644 --- a/project/00_0_hela_metadata_rawfiles.ipynb +++ b/project/00_0_hela_metadata_rawfiles.ipynb @@ -101,7 +101,7 @@ "metadata": {}, "outputs": [], "source": [ - "meta_stats = df_meta_rawfiles.describe(include='all', datetime_is_numeric=True)\n", + "meta_stats = df_meta_rawfiles.describe(include='all')\n", "meta_stats.T" ] }, diff --git a/project/00_0_hela_metadata_rawfiles.py b/project/00_0_hela_metadata_rawfiles.py index 03a501db0..2241611e8 100644 --- a/project/00_0_hela_metadata_rawfiles.py +++ b/project/00_0_hela_metadata_rawfiles.py @@ -58,7 +58,7 @@ print(msg) # %% -meta_stats = df_meta_rawfiles.describe(include='all', datetime_is_numeric=True) +meta_stats = df_meta_rawfiles.describe(include='all') meta_stats.T # %% [markdown] diff --git a/project/00_3_1_pride_metadata_analysis.ipynb b/project/00_3_1_pride_metadata_analysis.ipynb index d68e970a9..54452b7d9 100644 --- a/project/00_3_1_pride_metadata_analysis.ipynb +++ b/project/00_3_1_pride_metadata_analysis.ipynb @@ -177,7 +177,7 @@ "metadata": {}, "outputs": [], "source": [ - "meta_stats = df_meta.describe(include='all', datetime_is_numeric=True)\n", + "meta_stats = df_meta.describe(include='all')\n", "meta_stats.T.to_excel(excel_writer, sheet_name='des_stats', **writer_args)\n", "\n", "view = meta_stats.loc[:, (meta_stats.loc['unique'] > 1)\n", diff --git a/project/00_3_1_pride_metadata_analysis.py b/project/00_3_1_pride_metadata_analysis.py index c6ad55d12..105a82d7a 100644 --- a/project/00_3_1_pride_metadata_analysis.py +++ b/project/00_3_1_pride_metadata_analysis.py @@ -103,7 +103,7 @@ # ## Varying data between runs # %% -meta_stats = df_meta.describe(include='all', datetime_is_numeric=True) +meta_stats = df_meta.describe(include='all') meta_stats.T.to_excel(excel_writer, sheet_name='des_stats', **writer_args) view = meta_stats.loc[:, (meta_stats.loc['unique'] > 1) diff --git a/project/01_0_split_data.ipynb b/project/01_0_split_data.ipynb index 32cdb3713..ed62a0e31 100644 --- a/project/01_0_split_data.ipynb +++ b/project/01_0_split_data.ipynb @@ -327,8 +327,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_meta.describe(datetime_is_numeric=True,\n", - " percentiles=np.linspace(0.05, 0.95, 10))" + "df_meta.describe(percentiles=np.linspace(0.05, 0.95, 10))" ] }, { @@ -373,7 +372,7 @@ "metadata": {}, "outputs": [], "source": [ - "meta_stats = df_meta.describe(include='all', datetime_is_numeric=True)\n", + "meta_stats = df_meta.describe(include='all')\n", "meta_stats" ] }, @@ -697,7 +696,7 @@ "metadata": {}, "outputs": [], "source": [ - "pcs.describe(include='all', datetime_is_numeric=True).T" + "pcs.describe(include='all').T" ] }, { diff --git a/project/01_0_split_data.py b/project/01_0_split_data.py index 6b5e19770..fc41e698f 100644 --- a/project/01_0_split_data.py +++ b/project/01_0_split_data.py @@ -227,8 +227,7 @@ def join_as_str(seq): # %% -df_meta.describe(datetime_is_numeric=True, - percentiles=np.linspace(0.05, 0.95, 10)) +df_meta.describe(percentiles=np.linspace(0.05, 0.95, 10)) # %% [markdown] # select samples with a minimum retention time @@ -251,7 +250,7 @@ def join_as_str(seq): df_meta = df_meta.sort_values(params.meta_date_col) # %% -meta_stats = df_meta.describe(include='all', datetime_is_numeric=True) +meta_stats = df_meta.describe(include='all') meta_stats # %% [markdown] @@ -434,7 +433,7 @@ def join_as_str(seq): pcs # %% -pcs.describe(include='all', datetime_is_numeric=True).T +pcs.describe(include='all').T # %% if params.meta_cat_col: diff --git a/setup.cfg b/setup.cfg index 4e33952ce..9ac3edd9e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,9 +22,9 @@ include_package_data = True install_requires = numpy matplotlib - pandas<2 + pandas plotly - torch<2 + torch scikit-learn>=1.0 scipy seaborn diff --git a/vaep/pandas/__init__.py b/vaep/pandas/__init__.py index b10ce763a..489765682 100644 --- a/vaep/pandas/__init__.py +++ b/vaep/pandas/__init__.py @@ -73,7 +73,7 @@ def unique_cols(s: pd.Series) -> bool: def show_columns_with_variation(df: pd.DataFrame) -> pd.DataFrame: - df_describe = df.describe(include='all', datetime_is_numeric=True) + df_describe = df.describe(include='all') col_mask = (df_describe.loc['unique'] > 1) | ( df_describe.loc['std'] > 0.01) return df.loc[:, col_mask]