Skip to content

Commit ea31ea7

Browse files
authored
Merge pull request #89 from wwu-mmll/develop
Develop
2 parents 762d713 + b3e29cd commit ea31ea7

28 files changed

+503
-84
lines changed

.github/dependabot.yml

+2
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@ version: 2
77
updates:
88
- package-ecosystem: "github-actions" # See documentation for possible values
99
directory: "/" # Location of package manifests
10+
target-branch: "develop"
1011
schedule:
1112
interval: "daily"
1213
- package-ecosystem: "pip"
1314
directory: "/"
15+
target-branch: "develop"
1416
schedule:
1517
interval: "daily"

.github/workflows/documentation_build_and_update.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
fetch-depth: 0
1414

1515
- name: Install Python
16-
uses: actions/setup-python@v4
16+
uses: actions/setup-python@v5
1717
with:
1818
python-version: '3.9'
1919

.github/workflows/documentation_deployment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
fetch-depth: 0
1717

1818
- name: Install Python
19-
uses: actions/setup-python@v4
19+
uses: actions/setup-python@v5
2020
with:
2121
python-version: '3.9'
2222

.github/workflows/python-deploy_to_pypi.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
with:
1414
fetch-depth: 0
1515
- name: Set up Python 3.10.8
16-
uses: actions/setup-python@v4
16+
uses: actions/setup-python@v5
1717
with:
1818
python-version: 3.10.8
1919
- name: Install pypa/build

.github/workflows/python-test_and_deploy.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
steps:
2626
- uses: actions/checkout@v4
2727
- name: Set up Python 3.9
28-
uses: actions/setup-python@v4
28+
uses: actions/setup-python@v5
2929
with:
3030
python-version: 3.9
3131
- name: Install dependencies
@@ -50,7 +50,7 @@ jobs:
5050
with:
5151
fetch-depth: 0
5252
- name: Set up Python 3.9
53-
uses: actions/setup-python@v4
53+
uses: actions/setup-python@v5
5454
with:
5555
python-version: 3.9
5656
- name: Install pypa/build
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""
2+
Connectome-based predictive modeling
3+
4+
CPM is a method described in the following Nature Protocols article: https://www.nature.com/articles/nprot.2016.178
5+
It has been used in a number of publications to predict behavior from connectivity data.
6+
CPM works similar to a feature selection method. First, relevant edges (connectivity values) are identified through
7+
correlation analysis. Every edge is correlated with the predictive target. Only significant edges will be used in the
8+
subsequent steps. Next, the edge values for all significant positive and for all significant negative correlations are
9+
summed to create two new features. Lastly, these two features are used as input to another classifier.
10+
11+
In this example, no connectivity data is used, but the method will still work.
12+
This example is just supposed to show how to use CPM as feature selection and integration tool in PHOTONAI.
13+
"""
14+
15+
from sklearn.datasets import load_breast_cancer
16+
from sklearn.model_selection import KFold
17+
18+
from photonai import Hyperpipe, PipelineElement
19+
20+
21+
X, y = load_breast_cancer(return_X_y=True)
22+
23+
pipe = Hyperpipe("cpm_feature_selection_pipe",
24+
outer_cv=KFold(n_splits=5, shuffle=True, random_state=15),
25+
inner_cv=KFold(n_splits=5, shuffle=True, random_state=15),
26+
metrics=["balanced_accuracy"], best_config_metric="balanced_accuracy",
27+
project_folder='./tmp')
28+
29+
pipe += PipelineElement('CPMFeatureSelection', hyperparameters={'corr_method': ['pearson', 'spearman'],
30+
'p_threshold': [0.01, 0.05]})
31+
32+
pipe += PipelineElement('LogisticRegression')
33+
34+
pipe.fit(X, y)

examples/advanced/gpboost.py

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# pip install gpboost -U
2+
from sklearn.base import BaseEstimator, ClassifierMixin
3+
from sklearn.model_selection import GroupKFold, KFold
4+
from photonai.base import Hyperpipe, PipelineElement
5+
import numpy as np
6+
import pandas as pd
7+
import gpboost as gpb
8+
# from gpboost import GPBoostRegressor
9+
10+
11+
class GPBoostDataWrapper(BaseEstimator, ClassifierMixin):
12+
13+
def __init__(self):
14+
self.needs_covariates = True
15+
# self.gpmodel = gpb.GPModel(likelihood="gaussian")
16+
self.gpboost = None
17+
18+
19+
def fit(self, X, y, **kwargs):
20+
self.gpboost = gpb.GPBoostRegressor()
21+
if "clusters" in kwargs:
22+
clst = pd.Series(kwargs["clusters"])
23+
gpmodel = gpb.GPModel(likelihood="gaussian", group_data=clst)
24+
self.gpboost.fit(X, y, gp_model=gpmodel)
25+
else:
26+
raise NotImplementedError("GPBoost needs clusters")
27+
return self
28+
29+
def predict(self, X, **kwargs):
30+
clst = pd.Series(kwargs["clusters"])
31+
preds = self.gpboost.predict(X, group_data_pred=clst)
32+
preds = preds["response_mean"]
33+
return preds
34+
35+
def save(self):
36+
return None
37+
38+
39+
def get_gpboost_pipe(pipe_name, project_folder, split="group"):
40+
41+
if split == "group":
42+
outercv = GroupKFold(n_splits=10)
43+
else:
44+
outercv = KFold(n_splits=10)
45+
46+
my_pipe = Hyperpipe(pipe_name,
47+
optimizer='grid_search',
48+
metrics=['mean_absolute_error', 'mean_squared_error',
49+
'spearman_correlation', 'pearson_correlation'],
50+
best_config_metric='mean_absolute_error',
51+
outer_cv=outercv,
52+
inner_cv=KFold(n_splits=10),
53+
calculate_metrics_across_folds=True,
54+
use_test_set=True,
55+
verbosity=1,
56+
project_folder=project_folder)
57+
58+
# Add transformer elements
59+
my_pipe += PipelineElement("StandardScaler", hyperparameters={},
60+
test_disabled=True, with_mean=True, with_std=True)
61+
62+
my_pipe += PipelineElement.create("GPBoost", GPBoostDataWrapper(), hyperparameters={})
63+
64+
return my_pipe
65+
66+
67+
def get_mock_data():
68+
69+
X = np.random.randint(10, size=(200, 9))
70+
y = np.sum(X, axis=1)
71+
clst = np.random.randint(10, size=200)
72+
73+
return X, y, clst
74+
75+
76+
if __name__ == '__main__':
77+
78+
79+
X, y, clst = get_mock_data()
80+
81+
# define project folder
82+
project_folder = "./tmp/gpboost_debug"
83+
84+
my_pipe = get_gpboost_pipe("Test_gpboost", project_folder, split="random")
85+
my_pipe.fit(X, y, clusters=clst)

examples/basic/classification_custom.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
my_pipe = Hyperpipe('basic_svm_pipe',
66
inner_cv=KFold(n_splits=5),
77
outer_cv=KFold(n_splits=3),
8-
optimizer='sk_opt',
8+
optimizer='random_grid_search',
99
optimizer_params={'n_configurations': 15},
1010
metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
1111
best_config_metric='accuracy',

examples/basic/regression.py

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from photonai import RegressionPipe
33

44
my_pipe = RegressionPipe('diabetes',
5+
best_config_metric='median_absolute_error',
56
add_default_pipeline_elements=True,
67
scaling=True,
78
imputation=False,

examples/optimizer/meta_optimizer.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
inner_cv=KFold(n_splits=5),
88
outer_cv=KFold(n_splits=3),
99
optimizer='switch',
10-
optimizer_params={'name': 'sk_opt', 'n_configurations': 50},
10+
# optimizer_params={'name': 'grid_search'},
11+
optimizer_params={'name': 'random_search', 'n_configurations': 10},
1112
metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
1213
best_config_metric='accuracy',
1314
project_folder='./tmp',
@@ -16,7 +17,7 @@
1617
my_pipe.add(PipelineElement('StandardScaler'))
1718

1819
my_pipe += PipelineElement('PCA',
19-
hyperparameters={'n_components': IntegerRange(10, 30)},
20+
hyperparameters={'n_components': IntegerRange(10, 30, step=5)},
2021
test_disabled=True)
2122

2223
# set up two learning algorithms in an ensemble
@@ -25,15 +26,15 @@
2526
estimator_selection += PipelineElement('RandomForestClassifier',
2627
criterion='gini',
2728
hyperparameters={'min_samples_split': IntegerRange(2, 4),
28-
'max_features': ['auto', 'sqrt', 'log2'],
29+
'max_features': ['sqrt', 'log2'],
2930
'bootstrap': [True, False]})
3031
estimator_selection += PipelineElement('SVC',
31-
hyperparameters={'C': FloatRange(0.5, 25),
32+
hyperparameters={'C': FloatRange(0.5, 25, num=10),
3233
'kernel': ['linear', 'rbf']})
3334

3435
my_pipe += estimator_selection
3536

3637
X, y = load_breast_cancer(return_X_y=True)
3738
my_pipe.fit(X, y)
3839

39-
my_pipe.results_handler.get_mean_of_best_validation_configs_per_estimator()
40+
print(my_pipe.results_handler.get_mean_of_best_validation_configs_per_estimator())

photonai/base/hyperpipe.py

+28-15
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,9 @@ def __init__(self, name: Optional[str],
297297
cache_folder: str = None,
298298
nr_of_processes: int = 1,
299299
multi_threading: bool = True,
300-
allow_multidim_targets: bool = False):
300+
allow_multidim_targets: bool = False,
301+
raise_error: bool = False,
302+
score_train: bool = True):
301303
"""
302304
Initialize the object.
303305
@@ -420,6 +422,12 @@ def __init__(self, name: Optional[str],
420422
allow_multidim_targets:
421423
Allows multidimensional targets.
422424
425+
score_train:
426+
metrics for the train-set are only calculated if score_train is true.
427+
428+
raise_error:
429+
if true, errors in the inner fold are raised instead of suppressed as warnings.
430+
423431
"""
424432

425433
self.name = re.sub(r'\W+', '', name)
@@ -514,6 +522,8 @@ def __init__(self, name: Optional[str],
514522
self.permutation_id = permutation_id
515523
self.allow_multidim_targets = allow_multidim_targets
516524
self.is_final_fit = False
525+
self.score_train = score_train
526+
self.raise_error = raise_error
517527

518528
# ====================== Random Seed ===========================
519529
self.random_state = random_seed
@@ -933,7 +943,7 @@ def _finalize_optimization(self):
933943
logger.error(str(e))
934944

935945
# get feature importances of optimum pipe
936-
logger.info("Mapping back feature importances...")
946+
# logger.info("Mapping back feature importances...")
937947
feature_importances = self.optimum_pipe.feature_importances_
938948

939949
if not feature_importances:
@@ -943,18 +953,18 @@ def _finalize_optimization(self):
943953

944954
# write backmapping file only if optimum_pipes inverse_transform works completely.
945955
# restriction: only a faulty inverse_transform is considered, missing ones are further ignored.
946-
with warnings.catch_warnings(record=True) as w:
947-
# get backmapping
948-
backmapping, _, _ = self.optimum_pipe.\
949-
inverse_transform(np.array(feature_importances).reshape(1, -1), None)
950-
951-
if not any("The inverse transformation is not possible for" in s
952-
for s in [e.message.args[0] for e in w]):
953-
# save backmapping
954-
self.results_handler.save_backmapping(
955-
filename='optimum_pipe_feature_importances_backmapped', backmapping=backmapping)
956-
else:
957-
logger.info('Could not save feature importance: backmapping NOT successful.')
956+
# with warnings.catch_warnings(record=True) as w:
957+
# # get backmapping
958+
# backmapping, _, _ = self.optimum_pipe.\
959+
# inverse_transform(np.array(feature_importances).reshape(1, -1), None)
960+
#
961+
# if not any("The inverse transformation is not possible for" in s
962+
# for s in [e.message.args[0] for e in w]):
963+
# # save backmapping
964+
# self.results_handler.save_backmapping(
965+
# filename='optimum_pipe_feature_importances_backmapped', backmapping=backmapping)
966+
# else:
967+
# logger.info('Could not save feature importance: backmapping NOT successful.')
958968

959969
# save learning curves
960970
if self.cross_validation.learning_curves:
@@ -1085,7 +1095,9 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs):
10851095
cache_folder=self.cache_folder,
10861096
cache_updater=self.recursive_cache_folder_propagation,
10871097
dummy_estimator=dummy_estimator,
1088-
result_obj=outer_fold)
1098+
result_obj=outer_fold,
1099+
score_train=self.score_train,
1100+
raise_error=self.raise_error)
10891101
# 2. monitor outputs
10901102
self.results.outer_folds.append(outer_fold)
10911103

@@ -1243,6 +1255,7 @@ def train_and_get_fimps(pipeline, train_idx, test_idx, data_X, data_y, data_kwar
12431255

12441256
# get feature importances
12451257
logger.photon_system_log("Permutation Importances: Calculating performances for " + fold_str)
1258+
12461259
perm_imps = permutation_importance(pipeline, test_X, test_y, **kwargs)
12471260

12481261
# store into list

photonai/base/model_zoo.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def set_default_pipeline(self, scaling, imputation, imputation_nan_value, featur
217217
logger.photon_system_log("---")
218218
logger.stars()
219219

220-
def fit(self, X=None, y=None):
220+
def fit(self, X=None, y=None, **kwargs):
221221
if (X is not None and self.X_csv_path is not None) or (y is not None and self.y_csv_path is not None):
222222
raise ValueError("You can either give the fit function data or the pipe definition paths "
223223
"to csv files to load data from. Not both.")
@@ -228,7 +228,7 @@ def fit(self, X=None, y=None):
228228

229229
X = X if X is not None else pd.read_csv(self.X_csv_path, delimiter=self.delimiter)
230230
y = y if y is not None else pd.read_csv(self.y_csv_path, delimiter=self.delimiter)
231-
super().fit(X, y)
231+
super().fit(X, y, **kwargs)
232232

233233

234234
class ClassificationPipe(DefaultPipeline):

photonai/base/registry/PhotonCore.json

+8
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@
295295
"sklearn.linear_model.LogisticRegression",
296296
"Estimator"
297297
],
298+
"LinearDiscriminantAnalysis": [
299+
"sklearn.discriminant_analysis.LinearDiscriminantAnalysis",
300+
"Transformer"
301+
],
298302
"PassiveAggressiveClassifier":[
299303
"sklearn.linear_model.PassiveAggressiveClassifier",
300304
"Estimator"
@@ -486,5 +490,9 @@
486490
"LocallyLinearEmbedding":[
487491
"sklearn.manifold.LocallyLinearEmbedding",
488492
"Transformer"
493+
],
494+
"CPMFeatureSelection":[
495+
"photonai.modelwrapper.cpm_feature_selection.CPMFeatureSelection",
496+
"Estimator"
489497
]
490498
}

0 commit comments

Comments
 (0)