Genetic feature selection module for scikit-learn
Genetic algorithms mimic the process of natural selection to search for optimal values of a function.
pip install git+https://github.com/netzkontast/sklearn-genetic.git
- Python >= 2.7
- scikit-learn >= 0.18
- DEAP >= 1.0.2
from __future__ import print_function
import numpy as np
from sklearn import datasets, linear_model
from genetic_selection import GeneticSelectionCV
def main():
iris = datasets.load_iris()
# Some noisy data not correlated
E = np.random.uniform(0, 0.1, size=(len(iris.data), 20))
X = np.hstack((iris.data, E))
y = iris.target
estimator = linear_model.LogisticRegression()
selector = GeneticSelectionCV(estimator,
cv=5,
verbose=1,
scoring="accuracy",
n_population=50,
crossover_proba=0.5,
mutation_proba=0.2,
n_generations=40,
crossover_independent_proba=0.5,
mutation_independent_proba=0.05,
tournament_size=3,
caching=True,
n_jobs=-1)
selector = selector.fit(X, y)
print(selector.support_)
if __name__ == "__main__":
main()