From 8fb32a608bef830f8a3e00bf41334b241ad776d4 Mon Sep 17 00:00:00 2001 From: Carlos Sevilla Salcedo Date: Tue, 19 Dec 2023 16:39:27 +0100 Subject: [PATCH] Add InputData Module --- src/vai_lab/Data/Data_core.py | 38 ++++++++++--- src/vai_lab/InputData/InputData_core.py | 55 ++++++++++-------- src/vai_lab/InputData/plugins/Input.py | 39 +++++++++++++ src/vai_lab/_plugin_templates.py | 36 ++++++++++++ .../examples/xml_files/InputData_demo.xml | 56 +++++++++++++++++++ 5 files changed, 195 insertions(+), 29 deletions(-) create mode 100644 src/vai_lab/InputData/plugins/Input.py create mode 100644 src/vai_lab/examples/xml_files/InputData_demo.xml diff --git a/src/vai_lab/Data/Data_core.py b/src/vai_lab/Data/Data_core.py index fccb1c43..9e777fbf 100644 --- a/src/vai_lab/Data/Data_core.py +++ b/src/vai_lab/Data/Data_core.py @@ -28,7 +28,11 @@ def __init__(self: DataT) -> None: def _import_csv(self: DataT, filename: str, data_name: str, - strip_whitespace: bool = True) -> None: + strip_whitespace: bool = True, + index_col = None, + delimiter=',', + quotechar='|', + usecols=None) -> None: """import data directly into DataFrame :param filename: str, filename of csv file to be loaded :param data_name: str, name of dict key in which data will be stored @@ -36,15 +40,21 @@ def _import_csv(self: DataT, TODO: pandas has a lot of inbuilt read functions, including excel - implement """ self.data[data_name] = pd.read_csv(filename, - delimiter=',', - quotechar='|') + delimiter=delimiter, + quotechar=quotechar, + index_col = index_col, + usecols = usecols) if strip_whitespace: self.data[data_name].columns = [c.strip() for c in self.data[data_name].columns] def _import_png(self: DataT, filename: str, - data_name: str) -> None: + data_name: str, + index_col = None, + delimiter=',', + quotechar='|', + usecols=None) -> None: """Loads png into PIL.Image class. Adds instance to self.data The image is stored as a function (not a matrix - can be added if needed) :param filename: str, filename of csv file to be loaded @@ -56,7 +66,11 @@ def _import_png(self: DataT, def _import_dir(self: DataT, folder_dir: str, - data_name: str) -> None: + data_name: str, + index_col = None, + delimiter=',', + quotechar='|', + usecols=None) -> None: """Explores folder, and imports all data items recursively :param folder_dir: str, directory to be explored @@ -85,7 +99,11 @@ def _get_ext(self: DataT, path_dir: str) -> str: def import_data(self: DataT, filename: str, - data_name: str = "data") -> None: + data_name: str = "data", + index_col = None, + delimiter=',', + quotechar='|', + usecols=None) -> None: """Import file directly into DataFrame Translates relative files to absolute before parsing - not ideal Filename to parsing method based on extension name. @@ -94,7 +112,13 @@ def import_data(self: DataT, """ filename = rel_to_abs(filename) ext = self._get_ext(filename) - getattr(self, "_import_{0}".format(ext))(filename, data_name) + getattr(self, "_import_{0}".format(ext))(filename, + data_name, + index_col = index_col, + delimiter=delimiter, + quotechar=quotechar, + usecols=usecols) + return self.data def import_data_from_config(self: DataT, config: dict) -> None: for c in config.keys(): diff --git a/src/vai_lab/InputData/InputData_core.py b/src/vai_lab/InputData/InputData_core.py index 883d7c99..0a2b93e9 100644 --- a/src/vai_lab/InputData/InputData_core.py +++ b/src/vai_lab/InputData/InputData_core.py @@ -1,32 +1,20 @@ from vai_lab.Data.Data_core import Data from vai_lab._import_helper import import_plugin_absolute from vai_lab._types import PluginSpecsInterface, DataInterface +from pandas import DataFrame +from numpy import array -class InputData(Data): - def __init__(self): - super().__init__() - self.node_name = None - self.plugin_name = None - self.output_data = None +class InputData(object): + def __init__(self) -> None: + self.output_data: DataInterface + + def set_avail_plugins(self, avail_plugins: PluginSpecsInterface) -> None: + self._avail_plugins = avail_plugins def set_data_in(self, data_in: DataInterface) -> None: """Pass existing data from another module to be stored in this class""" self._data_in = data_in - def load_data_from_file(self, filename: str, data_id: str) -> None: - """Load data from file. Calls parent class method to store data in self.data""" - super().import_data(filename, data_id) - - def set_options(self, module_config: dict) -> None: - """Send configuration arguments to plugin - - :param module_config: dict of settings to configure the plugin - """ - self._module_config = module_config - - def set_avail_plugins(self, avail_plugins: PluginSpecsInterface) -> None: - self._avail_plugins = avail_plugins - def _load_plugin(self, data_in: DataInterface) -> None: avail_plugins = self._avail_plugins.find_from_readable_name( self._module_config["plugin"]["plugin_name"]) @@ -36,5 +24,28 @@ def _load_plugin(self, data_in: DataInterface) -> None: avail_plugins["_PLUGIN_CLASS_NAME"])\ .__call__(self._module_config["plugin"], data_in) - def get_result(self): - return self._data_in + def set_options(self, module_config: dict) -> None: + """Send configuration arguments to plugin + + :param module_config: dict of settings to configure the plugin + """ + self._module_config = module_config + + def launch(self) -> None: + + for method in self._module_config["plugin"]["methods"]["_order"]: + if "options" in self._module_config["plugin"]["methods"][method].keys(): + out = getattr(self._plugin, "{}".format(method))(self._plugin._parse_options_dict(self._module_config["plugin"]["methods"][method]["options"])) + else: + out = getattr(self._plugin, "{}".format(method))() + + if len(self._module_config["plugin"]["methods"]["_order"]) > 0: + try: + out = out[0][next(iter(out[0]))] + self.output_data = self._data_in.copy() + self.output_data.data[list(out[1])[0]] = out + except: + return + + def get_result(self) -> DataInterface: + return self.output_data \ No newline at end of file diff --git a/src/vai_lab/InputData/plugins/Input.py b/src/vai_lab/InputData/plugins/Input.py new file mode 100644 index 00000000..256da432 --- /dev/null +++ b/src/vai_lab/InputData/plugins/Input.py @@ -0,0 +1,39 @@ +from vai_lab._plugin_templates import InputDataPluginT + +from vai_lab.Data.Data_core import Data as model +import pandas as pd + +_PLUGIN_READABLE_NAMES = {"Input": "default", + "input": "alias"} # type:ignore +_PLUGIN_MODULE_OPTIONS = {} # type:ignore +_PLUGIN_REQUIRED_SETTINGS = {} # type:ignore +_PLUGIN_OPTIONAL_SETTINGS = {} # type:ignore +_PLUGIN_REQUIRED_DATA = {} # type:ignore +_PLUGIN_OPTIONAL_DATA = {"X", "Y", "X_tst", 'Y_tst'} # type:ignore + +class Input(InputDataPluginT): + """ + Import data to the pipeline or append column to existing data + """ + + def __init__(self, config = {}, data_in = [None], ini = False): + """Initialises parent class. + Passes `globals` dict of all current variables + """ + super().__init__(globals()) + if not ini: + # Model configuration + self.set_data_in(data_in) + self.configure(config) + # Model initialisation + try: + self.model = model(**self._config["options"]) + except Exception as exc: + print('The plugin encountered an error on the parameters of ' + +str(list(self._PLUGIN_READABLE_NAMES.keys())[list(self._PLUGIN_READABLE_NAMES.values()).index('default')])+': '+str(exc)+'.') + raise + else: + self.model = model + + self.import_plugin = self.model.import_data + self.append_plugin = self.model.append_data_column \ No newline at end of file diff --git a/src/vai_lab/_plugin_templates.py b/src/vai_lab/_plugin_templates.py index 18ced04e..ea2423d8 100644 --- a/src/vai_lab/_plugin_templates.py +++ b/src/vai_lab/_plugin_templates.py @@ -380,6 +380,42 @@ def save_file_as(self): pass +class InputDataPluginT(PluginTemplate, ABC): + def __init__(self, plugin_globals: dict) -> None: + super().__init__(plugin_globals) + + def import_data(self, options={}): + """Sends params to import data, then import data""" + try: + if isinstance(options, list): + return self.import_plugin(*options) + if isinstance(options, dict): + return self.import_plugin(**options), options.keys() + else: + return self.import_plugin(options) + except Exception as exc: + print('The plugin encountered an error when importing ' + +str(list(self._PLUGIN_READABLE_NAMES.keys())[list(self._PLUGIN_READABLE_NAMES.values()).index('default')])+': '+str(exc)+'.') + raise + + def append_data_column(self, options={}): + """ Appends a column to the dataframe + :returns: array, shape (n_samples,) + Returns predicted values. + """ + try: + if isinstance(options, list): + return self.append_plugin(*options) + elif isinstance(options, dict): + return self.append_plugin(**options) + else: + return self.append_plugin(options) + + except Exception as exc: + print('The plugin encountered an error when appending ' + +str(list(self._PLUGIN_READABLE_NAMES.keys())[list(self._PLUGIN_READABLE_NAMES.values()).index('default')])+': '+str(exc)+'.') + raise + class EnvironmentPluginT(PluginTemplate, ABC): @abstractmethod diff --git a/src/vai_lab/examples/xml_files/InputData_demo.xml b/src/vai_lab/examples/xml_files/InputData_demo.xml new file mode 100644 index 00000000..f8248ed3 --- /dev/null +++ b/src/vai_lab/examples/xml_files/InputData_demo.xml @@ -0,0 +1,56 @@ + + + + + + + [(350.0, 50), 0, {}] + + + + + + + + + + + + [(350.0, 350.0), 2, {0: 'd0-u2'}] + + + + + + + .\examples\crystalDesign\20190606-R1-JT\BMP\RGB\Calibrated\Samples.csv + + + 0 + + + + + + + + + + + + + + [(350.0, 650), 1, {2: 'd2-u1'}] + + + + + Input Data + + + .\examples\results\output.pkl + + + + +