Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add processing algorithm for balance data (SMOTETomek) #284

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from qgis.core import (
QgsProcessing,
QgsProcessingParameterEnum,
QgsProcessingParameterMapLayer,
QgsProcessingParameterMultipleLayers,
QgsProcessingParameterNumber,
QgsProcessingParameterRasterDestination,
)

from eis_qgis_plugin.eis_processing.eis_processing_algorithm import EISProcessingAlgorithm


class EISBalanceData(EISProcessingAlgorithm):
def __init__(self) -> None:
super().__init__()

self._name = "balance_data"
self._display_name = "Balance data"
self._group = "Training data tools"
self._group_id = "training_data_tools"
self._short_help_string = """
Balances the classes of input dataset using SMOTETomek resampling method.

For more information about Imblearn SMOTETomek read the documentation here:
https://imbalanced-learn.org/stable/references/generated/imblearn.combine.SMOTETomek.html.
"""

def initAlgorithm(self, config=None):
self.alg_parameters = [
"input_rasters",
"target_labels",
"sampling_strategy",
"random_state",
"output_raster",
"output_labels"
]

input_raster_param = QgsProcessingParameterMultipleLayers(
name=self.alg_parameters[0],
description="Input data",
layerType=QgsProcessing.TypeRaster,
)
input_raster_param.setHelp("Input data to be resampled.")
self.addParameter(input_raster_param)

target_labels_param = QgsProcessingParameterMapLayer(
name=self.alg_parameters[1],
description="Target labels"
)
target_labels_param.setHelp("Labels corresponding to input data.")
self.addParameter(target_labels_param)

sampling_strategy_param = QgsProcessingParameterEnum(
name=self.alg_parameters[2],
description="Sampling strategy",
options=["minority", "not minority", "not majority", "all", "auto"],
defaultValue="auto"
)
sampling_strategy_param.setHelp(
"Sampling strategy to use. 'minority' resamples only the minority class, 'not minority' \
resamples all classes but the minority class, 'not majority' resamples all classes \
but the majority class, 'all' resamples all classes and 'auto' is equivalent to 'not majority'."
)
self.addParameter(sampling_strategy_param)

random_state_param = QgsProcessingParameterNumber(
name = self.alg_parameters[3],
description="Random state",
optional=True,
minValue=0
)
random_state_param.setHelp("Seed for random number generation.")
self.addParameter(random_state_param)

output_raster_param = QgsProcessingParameterRasterDestination(
name=self.alg_parameters[4],
description="Output raster"
)
output_raster_param.setHelp("Resampled feature data.")
self.addParameter(output_raster_param)

output_labels_param = QgsProcessingParameterRasterDestination(
name=self.alg_parameters[5],
description="Output labels"
)
output_labels_param.setHelp("Labels corresponding to resampled feature data.")
self.addParameter(output_labels_param)
3 changes: 2 additions & 1 deletion eis_qgis_plugin/eis_processing/eis_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def loadAlgorithms(self) -> None:
raster_processing = self.load_algorithms_from_directory("raster_processing")
exploratory_analysis = self.load_algorithms_from_directory("exploratory_analysis")
prediction = self.load_algorithms_from_directory("prediction")
training_data_tools = self.load_algorithms_from_directory("training_data_tools")
transformations = self.load_algorithms_from_directory("transformations")
transformations_coda = self.load_algorithms_from_directory("transformations_coda")
utilities = self.load_algorithms_from_directory("utilities")
Expand All @@ -41,7 +42,7 @@ def loadAlgorithms(self) -> None:
for algorithm in (
evaluation + vector_processing + raster_processing +
exploratory_analysis + prediction + transformations + utilities +
filtering + transformations_coda
filtering + transformations_coda + training_data_tools
):
self.addAlgorithm(algorithm)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,5 +70,5 @@ def __init__(self, parent, model_main) -> None:
self.open_single_ilr_btn.clicked.connect(lambda _: processing.execAlgorithmDialog('eis:single_ilr_transform'))
self.open_single_ilr_btn.setIcon(QIcon(QgsApplication.getThemeIcon("processingAlgorithm.svg")))

# self.open_balance_data_btn.clicked.connect(lambda _: processing.execAlgorithmDialog('eis:balance_data'))
self.open_balance_data_btn.clicked.connect(lambda _: processing.execAlgorithmDialog('eis:balance_data'))
self.open_balance_data_btn.setIcon(QIcon(QgsApplication.getThemeIcon("processingAlgorithm.svg")))
4 changes: 2 additions & 2 deletions eis_qgis_plugin/resources/ui/modeling/data_preparation.ui
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@
<item row="0" column="0">
<widget class="QLabel" name="label_2">
<property name="enabled">
<bool>false</bool>
<bool>true</bool>
</property>
<property name="text">
<string>Balance data</string>
Expand All @@ -344,7 +344,7 @@
<item row="0" column="1">
<widget class="QPushButton" name="open_balance_data_btn">
<property name="enabled">
<bool>false</bool>
<bool>true</bool>
</property>
<property name="text">
<string>Open</string>
Expand Down
Loading