Skip to content

Commit

Permalink
Merge branch 'main' into update_pkg
Browse files Browse the repository at this point in the history
  • Loading branch information
Henry Webel authored Jun 6, 2024
2 parents 0d3b503 + 4c8e16d commit e0528fb
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 65 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/colab.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Test that tutorial runs on latest colab image
on:
push:
schedule:
- cron: '0 2 * * 3'
- cron: '0 2 3 * *'

jobs:
test:
Expand Down
8 changes: 7 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
{
"python.formatting.provider": "yapf"
"editor.defaultFormatter": "eeyore.yapf",
"python.testing.pytestArgs": [
"test"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,

}
46 changes: 45 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,48 @@
[project]
name = "njab"
authors = [{ name = "Henry Webel", email = "[email protected]" }]
description = "not Just Another Biomarker"
readme = "README.md"
requires-python = ">=3.8"
keywords = ["bioinformatics", "biomarker"]
license = { file = "LICENSE" }
classifiers = [
"License :: OSI Approved :: MIT License",
"Intended Audience :: Healthcare Industry",
"Intended Audience :: Science/Research",
"Programming Language :: Python :: 3",
"Topic :: Scientific/Engineering :: Bio-Informatics",
]
dependencies = [
"omegaconf",
"lifelines",
"numpy",
"pandas",
"scikit-learn",
"statsmodels",
"umap-learn",
"matplotlib",
"mrmr_selection",
"pingouin",
"seaborn",
]
dynamic = ["version"]

[project.optional-dependencies]
docs = [
"sphinx",
"sphinx-book-theme",
"myst-nb",
"ipywidgets",
"sphinx-new-tab-link!=0.2.2",
]

[project.urls]
"Bug Tracker" = "https://github.com/RasmussenLab/njab/issues"
"Homepage" = "https://github.com/RasmussenLab/njab"

[build-system]
requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"
requires = ["setuptools>=64", "setuptools_scm>=8"]

[tool.setuptools_scm]
49 changes: 0 additions & 49 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,52 +1,3 @@
[metadata]
name = njab
version = 0.0.4
description = not Just Another Biomarker
long_description = file: README.md
long_description_content_type = text/markdown
author = Henry Webel
author_email = "Henry Webel" <[email protected]>
license = MIT
license_files = LICENSE
classifiers =
License :: OSI Approved :: MIT License
Intended Audience :: Healthcare Industry
Intended Audience :: Science/Research
Programming Language :: Python :: 3
Topic :: Scientific/Engineering :: Bio-Informatics

[options]
requires_python = ">=3.8"
package_dir =
=src
packages = find:
include_package_data = False
install_requires =
omegaconf
lifelines
numpy
pandas
scikit-learn
statsmodels
umap-learn
matplotlib
mrmr_selection
pingouin
seaborn

[options.extras_require]
docs =
sphinx
sphinx-book-theme
myst-nb
ipywidgets
sphinx-new-tab-link!=0.2.2

[options.packages.find]
where = src
exclude =
test*

[yapf]
based_on_style = pep8

Expand Down
28 changes: 16 additions & 12 deletions src/njab/pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,26 @@
import logging
import typing

import omegaconf
import pandas as pd
import pandas.io.formats.format as pf

import omegaconf

logger = logging.getLogger(__name__)


def set_pandas_options() -> None:
def set_pandas_options(max_columns: int = 100,
max_row: int = 30,
min_row: int = 20,
float_format='{:,.3f}') -> None:
"""Update default pandas options for better display."""
pd.options.display.max_columns = 100
pd.options.display.max_rows = 30
pd.options.display.min_rows = 20
pd.options.display.float_format = '{:,.3f}'.format
pd.options.display.max_columns = max_columns
pd.options.display.max_rows = max_row
pd.options.display.min_rows = min_row
set_pandas_number_formatting(float_format=float_format)


def set_pandas_number_formatting(float_format='{:,.3f}') -> None:
pd.options.display.float_format = float_format.format
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.describe_option.html#pandas.describe_option
pd.options.styler.format.thousands = ','
# # https://github.com/pandas-dev/pandas/blob/main/pandas/io/formats/format.py#L1475
Expand Down Expand Up @@ -114,10 +120,8 @@ def combine_value_counts(X: pd.DataFrame, dropna=True) -> pd.DataFrame:
pandas.DataFrame
DataFrame of combined value counts.
"""
"""
"""
_df = pd.DataFrame()
freq_targets = list()
for col in X.columns:
_df = _df.join(X[col].value_counts(dropna=dropna), how='outer')
freq_targets = _df.sort_index()
freq_targets.append(X[col].value_counts(dropna=dropna).rename(col))
freq_targets = pd.concat(freq_targets, axis=1, sort=True)
return freq_targets
5 changes: 4 additions & 1 deletion src/njab/stats/ancova.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Analysis of covariance using pingouin and statsmodels."""
from __future__ import annotations

import numpy as np
import pandas as pd

import pingouin as pg
import statsmodels

Expand Down Expand Up @@ -52,6 +52,7 @@ def ancova_pg(df_long: pd.DataFrame,
# num_covar = len(covar)

for feat_name, data_feat in df_long.groupby(feat_col):
# ? drop duplicated colummns in long data format?
ancova = pg.ancova(data=data_feat, dv=dv, between=between, covar=covar)
ancova[feat_col] = feat_name
scores.append(ancova)
Expand Down Expand Up @@ -137,6 +138,8 @@ def ancova(self, random_seed=123):

scores = self.get_scores()
scores = filter_residuals_from_scores(scores)
# drop nan values (due to multicollinearity of features - i.e. duplicated features)
scores = scores.dropna()
scores = add_fdr_scores(scores, random_seed=random_seed)
self.scores = scores
return scores.set_index('Source', append=True)
Expand Down
8 changes: 8 additions & 0 deletions test/test_pandas.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
import pandas as pd

import njab


def test_thousands_display():
njab.pandas.set_pandas_options()
s = pd.Series([1_000_000])
assert str(s)[4:13] == '1,000,000'


def test_combine_value_counts():
df = pd.DataFrame({'a': [1, 2, 2, 2, 3, 3, 3], 'b': [1, 1, 1, 2, 2, 3, 3]})
exp = {'a': {1: 1, 2: 3, 3: 3}, 'b': {1: 3, 2: 2, 3: 2}}
act = njab.pandas.combine_value_counts(df).to_dict()
assert act == exp

0 comments on commit e0528fb

Please sign in to comment.