Skip to content

Commit f0497e0

Browse files
committed
Merge branch 'develop'
2 parents bc337ac + ca7100f commit f0497e0

28 files changed

+209
-98
lines changed

.github/workflows/python-test_and_deploy.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
pip install tensorflow pytest pytest-cov coveralls -r photonai.egg-info/requires.txt -r photonai/optimization/smac/requirements.txt -r photonai/optimization/nevergrad/requirements.txt
3838
- name: Test with pytest
3939
run: |
40-
PYTHONPATH=./ pytest ./test --cov=./photonai
40+
PYTHONPATH=./ pytest ./test --cov=./photonai --tb=long
4141
- name: Coveralls
4242
run: coveralls
4343
env:

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@
1010

1111
#### PHOTONAI is a high level python API for designing and optimizing machine learning pipelines.
1212

13-
We create a system in which you can easily select and combine both pre-processing and learning algorithms from
13+
We've created a system in which you can easily select and combine both pre-processing and learning algorithms from
1414
state-of-the-art machine learning toolboxes,
1515
and arrange them in simple or parallel pipeline data streams.
1616

1717
In addition, you can parametrize your training and testing
18-
workflow choosing cross-validation schemas, performance metrics and hyperparameter
18+
workflow choosing cross-validation schemes, performance metrics and hyperparameter
1919
optimization metrics from a list of pre-registered options.
2020

2121
Importantly, you can integrate custom solutions into your data processing pipeline,
@@ -25,7 +25,7 @@ state-of-the-art machine learning toolboxes,
2525
For a detailed description,
2626
__[visit our website and read the documentation](https://www.photon-ai.com)__
2727

28-
or you can read a prolonged introduction on [Arxiv](https://arxiv.org/abs/2002.05426)
28+
or you can read our paper in [PLOS ONE](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0254062)
2929

3030

3131

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import os
2+
3+
from sklearn.datasets import make_classification
4+
from sklearn.model_selection import ShuffleSplit
5+
from sklearn.metrics.pairwise import rbf_kernel
6+
from joblib import Memory
7+
8+
from photonai.base import Hyperpipe, PipelineElement
9+
from photonai.optimization import FloatRange
10+
11+
12+
cache_dir = './tmp/kernel_cache'
13+
os.makedirs(cache_dir, exist_ok=True)
14+
memory = Memory(cachedir=cache_dir, verbose=0)
15+
16+
17+
@memory.cache
18+
def cached_rbf(X, Y):
19+
return rbf_kernel(X, Y)
20+
21+
22+
# create toy data
23+
n_features = 10000
24+
n_samples = 1000
25+
n_informative = 10
26+
X, y = make_classification(n_samples, n_features, n_informative=n_informative)
27+
gamma = 1 / n_features
28+
29+
"""
30+
Especially with large datasets, it is unnecessary to recompute the kernel for every hyperparameter configuration.
31+
For that reason, you can pass a cached kernel function that will only recompute the kernel if the input data changes.
32+
If you don't want to cache the kernel, it still decreases the computation time by magnitudes when passing the kernel
33+
as dedicated function. See this issue for details:
34+
https://github.com/scikit-learn/scikit-learn/issues/21410
35+
https://stackoverflow.com/questions/69680420/using-a-custom-rbf-kernel-function-for-sklearns-svc-is-way-faster-than-built-in
36+
"""
37+
#kernel = 'kernel'
38+
#kernel = rbf_kernel
39+
kernel = cached_rbf
40+
41+
pipe = Hyperpipe('svm_with_custom_kernel',
42+
inner_cv=ShuffleSplit(n_splits=1, test_size=0.2),
43+
outer_cv=ShuffleSplit(n_splits=1, test_size=0.2),
44+
optimizer='sk_opt',
45+
optimizer_params={'n_configurations': 15},
46+
metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
47+
best_config_metric='accuracy',
48+
project_folder='./tmp',
49+
verbosity=1)
50+
51+
pipe += PipelineElement('StandardScaler')
52+
53+
pipe += PipelineElement('SVC',
54+
hyperparameters={'C': FloatRange(1e-6, 1e6)},
55+
gamma=gamma, kernel=kernel)
56+
57+
pipe.fit(X, y)
58+

photonai/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@
1313
1414
"""
1515

16-
__version__ = '2.1.0'
16+
__version__ = '2.2.0'
1717

1818
# __all__ = ( )

photonai/base/hyperpipe.py

+17-21
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,10 @@ def __init__(self, name: Optional[str],
346346
The metric that should be maximized or minimized in order to choose
347347
the best hyperparameter configuration.
348348
349-
eval_final_performance [bool, default=True]:
349+
eval_final_performance:
350350
DEPRECATED! Use "use_test_set" instead!
351351
352-
use_test_set [bool, default=True]:
352+
use_test_set:
353353
If the metrics should be calculated for the test set,
354354
otherwise the test set is seperated but not used.
355355
@@ -603,6 +603,11 @@ def input_data_sanity_checks(self, data, targets, **kwargs):
603603
"PHOTONAI erases every data item that has a Nan Target".format(str(nr_of_nans)))
604604
self.X = self.X[~nans_in_y]
605605
self.y = self.y[~nans_in_y]
606+
new_kwargs = dict()
607+
for name, element_list in kwargs.items():
608+
new_kwargs[name] = element_list[~nans_in_y]
609+
self.kwargs = new_kwargs
610+
606611
except Exception as e:
607612
# This is only for convenience so if it fails then never mind
608613
logger.error("Removing Nans in target vector failed: " + str(e))
@@ -637,7 +642,9 @@ def disable_multiprocessing_recursively(pipe):
637642
if hasattr(pipe, 'nr_of_processes'):
638643
pipe.nr_of_processes = 1
639644
for child in pipe.elements:
640-
if hasattr(child, 'base_element'):
645+
if isinstance(child, Branch):
646+
Hyperpipe.disable_multiprocessing_recursively(child)
647+
elif hasattr(child, 'base_element'):
641648
Hyperpipe.disable_multiprocessing_recursively(child.base_element)
642649
elif isinstance(pipe, PhotonPipeline):
643650
for name, child in pipe.named_steps.items():
@@ -866,7 +873,8 @@ def _finalize_optimization(self):
866873
self.optimum_pipe.fit(self.data.X, self.data.y, **self.data.kwargs)
867874

868875
# Before saving the optimum pipe, add preprocessing without multiprocessing
869-
self.optimum_pipe.add_preprocessing(self.disable_multiprocessing_recursively(self.preprocessing))
876+
self.disable_multiprocessing_recursively(self.preprocessing)
877+
self.optimum_pipe.add_preprocessing(self.preprocessing)
870878

871879
# Now truly set to no caching (including single_subject_caching)
872880
self.recursive_cache_folder_propagation(self.optimum_pipe, None, None)
@@ -939,11 +947,8 @@ def _prepare_pipeline(self):
939947
# ===================================================================
940948

941949
@staticmethod
942-
def fit_outer_folds(outer_fold_computer, X, y, kwargs, cache_folder):
943-
try:
944-
outer_fold_computer.fit(X, y, **kwargs)
945-
finally:
946-
CacheManager.clear_cache_files(cache_folder)
950+
def fit_outer_folds(outer_fold_computer, X, y, kwargs):
951+
outer_fold_computer.fit(X, y, **kwargs)
947952
return
948953

949954
def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs):
@@ -1038,8 +1043,7 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs):
10381043
result = dask.delayed(Hyperpipe.fit_outer_folds)(outer_fold_computer,
10391044
self.data.X,
10401045
self.data.y,
1041-
self.data.kwargs,
1042-
self.cache_folder)
1046+
self.data.kwargs)
10431047
delayed_jobs.append(result)
10441048
else:
10451049
try:
@@ -1058,7 +1062,8 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs):
10581062
# evaluate hyperparameter optimization results for best config
10591063
self._finalize_optimization()
10601064

1061-
# clear complete cache ?
1065+
# clear complete cache ? use self.cache_folder to delete all subfolders within the parent cache folder
1066+
# directory
10621067
CacheManager.clear_cache_files(self.cache_folder, force_all=True)
10631068

10641069
###############################################################################################
@@ -1247,15 +1252,6 @@ def get_permutation_feature_importances(self, **kwargs):
12471252
Returns mean of "importances_mean" and of "importances_std" of all outer folds.
12481253
12491254
Parameters:
1250-
X_val:
1251-
The array-like data with shape=[M, D],
1252-
where M is the number of samples and D is the number
1253-
of features. D must correspond to the number
1254-
of trained dimensions of the fit method.
1255-
1256-
y_val:
1257-
The array-like true targets.
1258-
12591255
**kwargs:
12601256
Keyword arguments, passed to sklearn.permutation_importance.
12611257

photonai/helper/helper.py

+14
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,20 @@ def print_double_metrics(metric_dict_train, metric_dict_test, photon_system_log=
226226
logger.debug(t)
227227

228228

229+
def print_outer_folds(metric_list, outer_fold_list, photon_system_log=True, summary=False):
230+
t = PrettyTable(["fold #"] + [metric for metric in metric_list] + ["Best Hyperparameter Config"])
231+
for outer_fold in outer_fold_list:
232+
nr_str = str(outer_fold.fold_nr)
233+
if outer_fold.owns_best_config:
234+
nr_str += "*"
235+
t.add_row([nr_str] +
236+
["%.4f" % outer_fold.best_config.best_config_score.validation.metrics[m] for m in metric_list] +
237+
[outer_fold.best_config.human_readable_config])
238+
if summary:
239+
return t
240+
if photon_system_log:
241+
logger.photon_system_log(t)
242+
229243
def print_estimator_metrics(estimator_performances, metric_list, summary=False):
230244
t = PrettyTable(['Estimator'] + metric_list)
231245
for estimator_name, estimator_values in estimator_performances.items():

photonai/modelwrapper/keras_base_models.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
import numpy as np
33
import keras
44
from typing import Union
5-
from keras.utils import to_categorical
5+
from keras.utils.all_utils import to_categorical
66
from keras.layers import Dropout, Dense
7-
from keras.layers.normalization import BatchNormalization
7+
from keras.layers import BatchNormalization
88
from keras.models import Sequential
9-
from keras.optimizers import Optimizer, Adam, RMSprop, Adadelta, Adagrad, Adamax, Nadam, SGD
9+
from keras.optimizers import Optimizer, adam_v2, rmsprop_v2, adadelta_v2, adagrad_v2, adamax_v2, nadam_v2, gradient_descent_v2
1010
from keras.activations import softmax, softplus, selu, sigmoid, softsign, hard_sigmoid, elu, relu, tanh, \
1111
linear, exponential
1212
from sklearn.base import ClassifierMixin, RegressorMixin
@@ -15,13 +15,13 @@
1515
from photonai.modelwrapper.keras_base_estimator import KerasBaseEstimator
1616

1717
__supported_optimizers__ = {
18-
'sgd': SGD,
19-
'rmsprop': RMSprop,
20-
'adagrad': Adagrad,
21-
'adadelta': Adadelta,
22-
'adam': Adam,
23-
'adamax': Adamax,
24-
'nadam': Nadam
18+
'sgd': gradient_descent_v2.SGD,
19+
'rmsprop': rmsprop_v2.RMSprop,
20+
'adagrad': adagrad_v2.Adagrad,
21+
'adadelta': adadelta_v2.Adadelta,
22+
'adam': adam_v2.Adam,
23+
'adamax': adamax_v2.Adamax,
24+
'nadam': nadam_v2.Nadam
2525
}
2626
__supported_activations__ = {
2727
'softmax': softmax,

photonai/optimization/optimization_info.py

+1
Original file line numberDiff line numberDiff line change
@@ -120,5 +120,6 @@ def get_optimum_config_outer_folds(self, outer_folds):
120120
# min metric
121121
best_config_metric_nr = np.argmin(list_of_scores)
122122

123+
outer_folds[best_config_metric_nr].owns_best_config = True
123124
best_config = outer_folds[best_config_metric_nr].best_config
124125
return best_config
+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# please install SWIG -> have a look at https://github.com/automl/SMAC3
2-
smac
2+
smac>=1.0.0
33
emcee
44
pyDOE

photonai/optimization/smac/smac.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
from smac.configspace import UniformFloatHyperparameter, UniformIntegerHyperparameter, CategoricalHyperparameter, \
1111
ConfigurationSpace, Configuration, InCondition, Constant
1212
from smac.scenario.scenario import Scenario
13-
from smac.facade.smac_bo_facade import SMAC4BO
13+
from smac.facade.smac_bb_facade import SMAC4BB as SMAC4BO
1414
from smac.facade.smac_hpo_facade import SMAC4HPO
1515
from smac.facade.smac_ac_facade import SMAC4AC
16-
from smac.facade.smac_bohb_facade import BOHB4HPO
16+
from smac.facade.smac_mf_facade import SMAC4MF as BOHB4HPO
1717
__found__ = True
1818
except (ModuleNotFoundError, ImportError):
1919
__found__ = False

photonai/processing/cross_validation.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ class StratifiedKFoldRegression(_BaseKFold):
5555
"""
5656

5757
def __init__(self, n_splits=3, shuffle=False, random_state=None):
58-
super(StratifiedKFoldRegression, self).__init__(n_splits, shuffle, random_state)
58+
super(StratifiedKFoldRegression, self).__init__(n_splits=n_splits,
59+
shuffle=shuffle,
60+
random_state=random_state)
5961

6062
def _make_test_folds(self, X, y=None):
6163
rng = self.random_state

0 commit comments

Comments
 (0)