Skip to content

Commit

Permalink
Added liblinear solver in the LogisticRegression initialization. Adde…
Browse files Browse the repository at this point in the history
…d training accuracy and testing accuracy print in the function __VotingClassifier__(). In the main function called ensemble.__VotingClassifier__(). And finaly, added requirements.txt file as I tested it with the lates scikit-learn version.

Signed-off-by: Bikash Karmokar <[email protected]>
  • Loading branch information
bikashkarmokar committed Nov 18, 2021
1 parent ec2cbcd commit da20b08
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 58 deletions.
141 changes: 141 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# pycharm
.idea
41 changes: 21 additions & 20 deletions blending.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression


class Ensemble:
def __init__(self):
self.x_train = None
Expand All @@ -19,31 +20,32 @@ def __init__(self):
def load_data(self):
x, y = load_breast_cancer(return_X_y=True)
self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(x, y, test_size=0.15, random_state=23)
self.x_train, self.x_val, self.y_train, self.y_val = train_test_split(self.x_train, self.y_train, test_size=0.3, random_state=23)

self.x_train, self.x_val, self.y_train, self.y_val = train_test_split(self.x_train, self.y_train, test_size=0.3,
random_state=23)

def BlendingClassifier(self):

# Define weak learners
weak_learners = [('dt', DecisionTreeClassifier()),
('knn', KNeighborsClassifier()),
('rf', RandomForestClassifier()),
('gb', GradientBoostingClassifier()),
('gn', GaussianNB())]
# Finaler learner or meta model
('knn', KNeighborsClassifier()),
('rf', RandomForestClassifier()),
('gb', GradientBoostingClassifier()),
('gn', GaussianNB())]

# Final learner or meta model
final_learner = LogisticRegression()

train_meta_model = None
test_meta_model = None

# Start stacking
for clf_id, clf in weak_learners:

# Predictions for each classifier based on k-fold
val_predictions, test_predictions = self.train_level_0(clf)

# Stack predictions which will form
# the inputa data for the data model
# the input data for the data model
if isinstance(train_meta_model, np.ndarray):
train_meta_model = np.vstack((train_meta_model, val_predictions))
else:
Expand All @@ -55,25 +57,24 @@ def BlendingClassifier(self):
test_meta_model = np.vstack((test_meta_model, test_predictions))
else:
test_meta_model = test_predictions

# Transpose train_meta_model
train_meta_model = train_meta_model.T

# Transpose test_meta_model
test_meta_model = test_meta_model.T

# Training level 1
self.train_level_1(final_learner, train_meta_model, test_meta_model)


def train_level_0(self, clf):
# Train with base x_train
clf.fit(self.x_train, self.y_train)

# Generate predictions for the holdout set (validation)
# These predictions will build the input for the meta model
val_predictions = clf.predict(self.x_val)

# Generate predictions for original test set
# These predictions will be used to test the meta model
test_predictions = clf.predict(self.x_test)
Expand All @@ -83,13 +84,13 @@ def train_level_0(self, clf):
def train_level_1(self, final_learner, train_meta_model, test_meta_model):
# Train is carried out with final learner or meta model
final_learner.fit(train_meta_model, self.y_val)

# Getting train and test accuracies from meta_model
print(f"Train accuracy: {final_learner.score(train_meta_model, self.y_val)}")
print(f"Train accuracy: {final_learner.score(train_meta_model, self.y_val)}")
print(f"Test accuracy: {final_learner.score(test_meta_model, self.y_test)}")


if __name__ == "__main__":
ensemble = Ensemble()
ensemble.load_data()
ensemble.BlendingClassifier()
ensemble.BlendingClassifier()
Binary file added requirements.txt
Binary file not shown.
44 changes: 23 additions & 21 deletions stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression


class Ensemble:
def __init__(self):
self.x_train = None
Expand All @@ -20,17 +21,17 @@ def __init__(self):
def load_data(self):
x, y = load_breast_cancer(return_X_y=True)
self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(x, y, test_size=0.3, random_state=23)

def StackingClassifier(self):

# Define weak learners
weak_learners = [('dt', DecisionTreeClassifier()),
('knn', KNeighborsClassifier()),
('rf', RandomForestClassifier()),
('gb', GradientBoostingClassifier()),
('gn', GaussianNB())]
# Finaler learner or meta model
('knn', KNeighborsClassifier()),
('rf', RandomForestClassifier()),
('gb', GradientBoostingClassifier()),
('gn', GaussianNB())]

# Final learner or meta model
final_learner = LogisticRegression()

train_meta_model = None
Expand All @@ -40,12 +41,12 @@ def StackingClassifier(self):
for clf_id, clf in weak_learners:
# Predictions for each classifier based on k-fold
predictions_clf = self.k_fold_cross_validation(clf)

# Predictions for test set for each classifier based on train of level 0
test_predictions_clf = self.train_level_0(clf)

# Stack predictions which will form
# the inputa data for the data model
# the input data for the data model
if isinstance(train_meta_model, np.ndarray):
train_meta_model = np.vstack((train_meta_model, predictions_clf))
else:
Expand All @@ -57,19 +58,18 @@ def StackingClassifier(self):
test_meta_model = np.vstack((test_meta_model, test_predictions_clf))
else:
test_meta_model = test_predictions_clf

# Transpose train_meta_model
train_meta_model = train_meta_model.T

# Transpose test_meta_model
test_meta_model = test_meta_model.T

# Training level 1
self.train_level_1(final_learner, train_meta_model, test_meta_model)


def k_fold_cross_validation(self, clf):

predictions_clf = None

# Number of samples per fold
Expand All @@ -87,14 +87,16 @@ def k_fold_cross_validation(self, clf):
test = self.x_train[(batch_size * fold): (batch_size * (fold + 1)), :]
batch_start = batch_size * fold
batch_finish = batch_size * (fold + 1)

# test & training samples for each fold iteration
fold_x_test = self.x_train[batch_start:batch_finish, :]
fold_x_train = self.x_train[[index for index in range(self.x_train.shape[0]) if index not in range(batch_start, batch_finish)], :]
fold_x_train = self.x_train[[index for index in range(self.x_train.shape[0]) if
index not in range(batch_start, batch_finish)], :]

# test & training targets for each fold iteration
fold_y_test = self.y_train[batch_start:batch_finish]
fold_y_train = self.y_train[[index for index in range(self.x_train.shape[0]) if index not in range(batch_start, batch_finish)]]
fold_y_train = self.y_train[
[index for index in range(self.x_train.shape[0]) if index not in range(batch_start, batch_finish)]]

# Fit current classifier
clf.fit(fold_x_train, fold_y_train)
Expand All @@ -113,18 +115,18 @@ def train_level_0(self, clf):
clf.fit(self.x_train, self.y_train)
# Get predictions from full real test set
y_pred = clf.predict(self.x_test)

return y_pred

def train_level_1(self, final_learner, train_meta_model, test_meta_model):
# Train is carried out with final learner or meta model
final_learner.fit(train_meta_model, self.y_train)
# Getting train and test accuracies from meta_model
print(f"Train accuracy: {final_learner.score(train_meta_model, self.y_train)}")
print(f"Train accuracy: {final_learner.score(train_meta_model, self.y_train)}")
print(f"Test accuracy: {final_learner.score(test_meta_model, self.y_test)}")


if __name__ == "__main__":
ensemble = Ensemble()
ensemble.load_data()
ensemble.StackingClassifier()
ensemble.StackingClassifier()
Loading

0 comments on commit da20b08

Please sign in to comment.