From 73df97cc58ebd8fbffc1c1e78bf71cabc46eefc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 7 Sep 2024 21:26:45 +0100 Subject: [PATCH 01/11] table examples, base class --- skpro/datatypes/_base/__init__.py | 4 +- skpro/datatypes/_base/_base.py | 37 ++++ skpro/datatypes/_table/_examples.py | 261 ++++++++++++++++++++-------- 3 files changed, 231 insertions(+), 71 deletions(-) diff --git a/skpro/datatypes/_base/__init__.py b/skpro/datatypes/_base/__init__.py index b6727e858..98113b454 100644 --- a/skpro/datatypes/_base/__init__.py +++ b/skpro/datatypes/_base/__init__.py @@ -1,5 +1,5 @@ """Base module for datatypes.""" -from skpro.datatypes._base._base import BaseConverter, BaseDatatype +from skpro.datatypes._base._base import BaseConverter, BaseDatatype, BaseExample -__all__ = ["BaseConverter", "BaseDatatype"] +__all__ = ["BaseConverter", "BaseDatatype", "BaseExample"] diff --git a/skpro/datatypes/_base/_base.py b/skpro/datatypes/_base/_base.py index 65d36e3e8..7552e8f80 100644 --- a/skpro/datatypes/_base/_base.py +++ b/skpro/datatypes/_base/_base.py @@ -328,6 +328,43 @@ def _get_key(self): return (mtype_from, mtype_to, scitype) +class BaseExample(BaseObject): + """Base class for Example fixtures used in tests and get_examples.""" + + _tags = { + "object_type": "datatype_example", + "scitype": None, + "mtype": None, + "python_version": None, + "python_dependencies": None, + "index": None, # integer index of the example to match with other mtypes + "lossy": False, # whether the example is lossy + } + + def __init__(self): + super().__init__() + + def _get_key(self): + """Get unique dictionary key corresponding to self. + + Private function, used in collecting a dictionary of examples. + """ + mtype = self.get_class_tag("mtype") + scitype = self.get_class_tag("scitype") + index = self.get_class_tag("index") + return (mtype, scitype, index) + + def build(self): + """Build example. + + Returns + ------- + obj : any + Example object. + """ + raise NotImplementedError + + def _coerce_str_to_cls(cls_or_str): """Get class from string. diff --git a/skpro/datatypes/_table/_examples.py b/skpro/datatypes/_table/_examples.py index 6cd416870..8fa20b6b9 100644 --- a/skpro/datatypes/_table/_examples.py +++ b/skpro/datatypes/_table/_examples.py @@ -24,6 +24,7 @@ import numpy as np import pandas as pd +from skpro.datatypes._base import BaseExample from skpro.utils.validation._dependencies import _check_soft_dependencies example_dict = dict() @@ -33,100 +34,222 @@ ### # example 0: univariate -df = pd.DataFrame({"a": [1, 4, 0.5, -3]}) -example_dict[("pd_DataFrame_Table", "Table", 0)] = df -example_dict_lossy[("pd_DataFrame_Table", "Table", 0)] = False +class UnivTable(BaseExample): -arr = np.array([[1], [4], [0.5], [-3]]) + _tags = { + "scitype": "Table", + "index": 0, + "metadata": { + "is_univariate": True, + "is_empty": False, + "has_nans": False, + "n_instances": 4, + "n_features": 1, + "feature_names": ["a"], + }, + } -example_dict[("numpy2D", "Table", 0)] = arr -example_dict_lossy[("numpy2D", "Table", 0)] = True -arr = np.array([1, 4, 0.5, -3]) +class UnivTableDf(UnivTable): -example_dict[("numpy1D", "Table", 0)] = arr -example_dict_lossy[("numpy1D", "Table", 0)] = True + _tags = { + "mtype": "pd_DataFrame_Table", + "python_dependencies": None, + "lossy": False, + } -series = pd.Series([1, 4, 0.5, -3]) + def build(self): + return pd.DataFrame({"a": [1, 4, 0.5, -3]}) -example_dict[("pd_Series_Table", "Table", 0)] = series -example_dict_lossy[("pd_Series_Table", "Table", 0)] = True -list_of_dict = [{"a": 1.0}, {"a": 4.0}, {"a": 0.5}, {"a": -3.0}] +class UnivTableNumpy2D(UnivTable): -example_dict[("list_of_dict", "Table", 0)] = list_of_dict -example_dict_lossy[("list_of_dict", "Table", 0)] = False + _tags = { + "mtype": "numpy2D", + "python_dependencies": None, + "lossy": True, + } -if _check_soft_dependencies(["polars", "pyarrow"], severity="none"): - from skpro.datatypes._adapter.polars import convert_pandas_to_polars_with_index + def build(self): + return np.array([[1], [4], [0.5], [-3]]) - example_dict[ - ("polars_eager_table", "Table", 0) - ] = convert_pandas_to_polars_with_index(df) - example_dict_lossy[("polars_eager_table", "Table", 0)] = False - example_dict[ - ("polars_lazy_table", "Table", 0) - ] = convert_pandas_to_polars_with_index(df, lazy=True) - example_dict_lossy[("polars_lazy_table", "Table", 0)] = False +class UnivTableNumpy1D(UnivTable): + + _tags = { + "mtype": "numpy1D", + "python_dependencies": None, + "lossy": True, + } + + def build(self): + return np.array([1, 4, 0.5, -3]) + + +class UnivTableSeries(UnivTable): + + _tags = { + "mtype": "pd_Series_Table", + "python_dependencies": None, + "lossy": True, + } + + def build(self): + return pd.Series([1, 4, 0.5, -3]) + + +class UnivTableListOfDict(UnivTable): + + _tags = { + "mtype": "list_of_dict", + "python_dependencies": None, + "lossy": False, + } + + def build(self): + return [{"a": 1.0}, {"a": 4.0}, {"a": 0.5}, {"a": -3.0}] + + +class UnivTablePolarsEager(UnivTable): + + _tags = { + "mtype": "polars_eager_table", + "python_dependencies": ["polars", "pyarrow"], + "lossy": False, + } + + def build(self): + from skpro.datatypes._adapter.polars import convert_pandas_to_polars_with_index + + df = pd.DataFrame({"a": [1, 4, 0.5, -3]}) + return convert_pandas_to_polars_with_index(df) + + +class UnivTablePolarsLazy(UnivTable): + + _tags = { + "mtype": "polars_lazy_table", + "python_dependencies": ["polars", "pyarrow"], + "lossy": False, + } + + def build(self): + from skpro.datatypes._adapter.polars import convert_pandas_to_polars_with_index + + df = pd.DataFrame({"a": [1, 4, 0.5, -3]}) + return convert_pandas_to_polars_with_index(df, lazy=True) -example_dict_metadata[("Table", 0)] = { - "is_univariate": True, - "is_empty": False, - "has_nans": False, - "n_instances": 4, - "n_features": 1, - "feature_names": ["a"], -} ### # example 1: multivariate -example_dict[("numpy1D", "Table", 1)] = None -example_dict_lossy[("numpy1D", "Table", 1)] = None +class MultivTable(BaseExample): + + _tags = { + "scitype": "Table", + "index": 1, + "metadata": { + "is_univariate": False, + "is_empty": False, + "has_nans": False, + "n_instances": 4, + "n_features": 2, + "feature_names": ["a", "b"], + }, + } + + +class MultivTableDf(MultivTable): + + _tags = { + "mtype": "pd_DataFrame_Table", + "python_dependencies": None, + "lossy": False, + } + + def build(self): + return pd.DataFrame({"a": [1, 4, 0.5, -3], "b": [3, 7, 2, -3 / 7]}) + + +class MultivTableNumpy2D(MultivTable): + + _tags = { + "mtype": "numpy2D", + "python_dependencies": None, + "lossy": True, + } + + def build(self): + return np.array([[1, 3], [4, 7], [0.5, 2], [-3, -3 / 7]]) + + +class MultivTableNumpy1D(MultivTable): + + _tags = { + "mtype": "numpy1D", + "python_dependencies": None, + "lossy": None, + } + + def build(self): + return None + + +class MultivTableSeries(MultivTable): + + _tags = { + "mtype": "pd_Series_Table", + "python_dependencies": None, + "lossy": None, + } + + def build(self): + return None + + +class MultivTableListOfDict(MultivTable): + + _tags = { + "mtype": "list_of_dict", + "python_dependencies": None, + "lossy": False, + } -df = pd.DataFrame({"a": [1, 4, 0.5, -3], "b": [3, 7, 2, -3 / 7]}) + def build(self): + return [ + {"a": 1.0, "b": 3.0}, + {"a": 4.0, "b": 7.0}, + {"a": 0.5, "b": 2.0}, + {"a": -3.0, "b": -3 / 7}, + ] -example_dict[("d_DataFrame_Table", "Table", 1)] = df -example_dict_lossy[("pd_DataFrame_Table", "Table", 1)] = False -arr = np.array([[1, 3], [4, 7], [0.5, 2], [-3, -3 / 7]]) +class MultivTablePolarsEager(MultivTable): -example_dict[("numpy2D", "Table", 1)] = arr -example_dict_lossy[("numpy2D", "Table", 1)] = True + _tags = { + "mtype": "polars_eager_table", + "python_dependencies": ["polars", "pyarrow"], + "lossy": False, + } -example_dict[("pd_Series_Table", "Table", 1)] = None -example_dict_lossy[("pd_Series_Table", "Table", 1)] = None + def build(self): + from skpro.datatypes._adapter.polars import convert_pandas_to_polars_with_index -list_of_dict = [ - {"a": 1.0, "b": 3.0}, - {"a": 4.0, "b": 7.0}, - {"a": 0.5, "b": 2.0}, - {"a": -3.0, "b": -3 / 7}, -] + df = pd.DataFrame({"a": [1, 4, 0.5, -3], "b": [3, 7, 2, -3 / 7]}) + return convert_pandas_to_polars_with_index(df) -example_dict[("list_of_dict", "Table", 1)] = list_of_dict -example_dict_lossy[("list_of_dict", "Table", 1)] = False -if _check_soft_dependencies(["polars", "pyarrow"], severity="none"): - from skpro.datatypes._adapter.polars import convert_pandas_to_polars_with_index +class MultivTablePolarsLazy(MultivTable): - example_dict[ - ("polars_eager_table", "Table", 1) - ] = convert_pandas_to_polars_with_index(df) - example_dict_lossy[("polars_eager_table", "Table", 1)] = False + _tags = { + "mtype": "polars_lazy_table", + "python_dependencies": ["polars", "pyarrow"], + "lossy": False, + } - example_dict[ - ("polars_lazy_table", "Table", 1) - ] = convert_pandas_to_polars_with_index(df, lazy=True) - example_dict_lossy[("polars_lazy_table", "Table", 1)] = False + def build(self): + from skpro.datatypes._adapter.polars import convert_pandas_to_polars_with_index -example_dict_metadata[("Table", 1)] = { - "is_univariate": False, - "is_empty": False, - "has_nans": False, - "n_instances": 4, - "n_features": 2, - "feature_names": ["a", "b"], -} + df = pd.DataFrame({"a": [1, 4, 0.5, -3], "b": [3, 7, 2, -3 / 7]}) + return convert_pandas_to_polars_with_index(df, lazy=True) From aba28a194daaaf189ab1cf4dd00d32af436d6310 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 7 Sep 2024 22:23:16 +0100 Subject: [PATCH 02/11] generation via lookup --- skpro/datatypes/_examples.py | 52 +++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/skpro/datatypes/_examples.py b/skpro/datatypes/_examples.py index 9865210c3..034ae51fd 100644 --- a/skpro/datatypes/_examples.py +++ b/skpro/datatypes/_examples.py @@ -13,6 +13,8 @@ e.g., metadata such as column names are missing """ +from functools import lru_cache + from skpro.datatypes._registry import mtype_to_scitype __author__ = ["fkiraly"] @@ -26,24 +28,42 @@ example_dict_metadata_Proba, example_dict_Proba, ) -from skpro.datatypes._table import ( - example_dict_lossy_Table, - example_dict_metadata_Table, - example_dict_Table, -) -# pool example_dict-s -example_dict = dict() -example_dict.update(example_dict_Proba) -example_dict.update(example_dict_Table) -example_dict_lossy = dict() -example_dict_lossy.update(example_dict_lossy_Proba) -example_dict_lossy.update(example_dict_lossy_Table) +@lru_cache(maxsize=1) +def generate_example_dicts(soft_deps="present"): + """Generate example dicts using lookup.""" + from skbase.utils.dependencies import _check_estimator_deps + + from skpro.datatypes._base import BaseExample + from skpro.utils.retrieval import _all_classes + + classes = _all_classes("skpro.datatypes") + classes = [x[1] for x in classes] + classes = [x for x in classes if issubclass(x, BaseExample)] + classes = [x for x in classes if not x.__name__.startswith("Base")] -example_dict_metadata = dict() -example_dict_metadata.update(example_dict_metadata_Proba) -example_dict_metadata.update(example_dict_metadata_Table) + # subset only to data types with soft dependencies present + if soft_deps == "present": + classes = [x for x in classes if _check_estimator_deps(x, severity="none")] + + example_dict = dict() + example_dict_lossy = dict() + example_dict_metadata = dict() + for cls in classes: + k = cls() + key = k._get_key() + key_meta = (key[1], key[2]) + example_dict[key] = k + example_dict_lossy[key] = k.get_class_tags().get("lossy", False) + example_dict_metadata[key_meta] = k.get_class_tags().get("metadata", {}) + + # temporary while refactoring + example_dict.update(example_dict_Proba) + example_dict_lossy.update(example_dict_lossy_Proba) + example_dict_metadata.update(example_dict_metadata_Proba) + + return example_dict, example_dict_lossy, example_dict_metadata def get_examples( @@ -79,6 +99,8 @@ def get_examples( if as_scitype is None: as_scitype = mtype_to_scitype(mtype) + example_dict, example_dict_lossy, example_dict_metadata = generate_example_dicts() + # retrieve all keys that match the query exkeys = example_dict.keys() keys = [k for k in exkeys if k[0] == mtype and k[1] == as_scitype] From 5a2880c1432c8347b524ab6e4bccba782c52adc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 7 Sep 2024 22:49:57 +0100 Subject: [PATCH 03/11] proba --- skpro/datatypes/_examples.py | 5 - skpro/datatypes/_proba/_examples.py | 146 +++++++++++++++++++--------- skpro/datatypes/_table/_examples.py | 2 - 3 files changed, 99 insertions(+), 54 deletions(-) diff --git a/skpro/datatypes/_examples.py b/skpro/datatypes/_examples.py index 034ae51fd..456827a92 100644 --- a/skpro/datatypes/_examples.py +++ b/skpro/datatypes/_examples.py @@ -58,11 +58,6 @@ def generate_example_dicts(soft_deps="present"): example_dict_lossy[key] = k.get_class_tags().get("lossy", False) example_dict_metadata[key_meta] = k.get_class_tags().get("metadata", {}) - # temporary while refactoring - example_dict.update(example_dict_Proba) - example_dict_lossy.update(example_dict_lossy_Proba) - example_dict_metadata.update(example_dict_metadata_Proba) - return example_dict, example_dict_lossy, example_dict_metadata diff --git a/skpro/datatypes/_proba/_examples.py b/skpro/datatypes/_proba/_examples.py index a87f18269..c81912522 100644 --- a/skpro/datatypes/_proba/_examples.py +++ b/skpro/datatypes/_proba/_examples.py @@ -31,64 +31,116 @@ import numpy as np import pandas as pd -example_dict = dict() -example_dict_lossy = dict() -example_dict_metadata = dict() +from skpro.datatypes._base import BaseExample ### # example 0: univariate -pred_q = pd.DataFrame({0.2: [1, 2, 3], 0.6: [2, 3, 4]}) -pred_q.columns = pd.MultiIndex.from_product([["foo"], [0.2, 0.6]]) +class ProbaUniv(BaseExample): -# we need to use this due to numerical inaccuracies from the binary based representation -pseudo_0_2 = 2 * np.abs(0.6 - 0.5) + _tags = { + "scitype": "Proba", + "index": 0, + "metadata": { + "is_univariate": True, + "is_empty": False, + "has_nans": False, + }, + } -example_dict[("pred_quantiles", "Proba", 0)] = pred_q -example_dict_lossy[("pred_quantiles", "Proba", 0)] = False -pred_int = pd.DataFrame({0.2: [1, 2, 3], 0.6: [2, 3, 4]}) -pred_int.columns = pd.MultiIndex.from_tuples( - [("foo", 0.6, "lower"), ("foo", pseudo_0_2, "upper")] -) +class ProbaUnivPredQ(ProbaUniv): -example_dict[("pred_interval", "Proba", 0)] = pred_int -example_dict_lossy[("pred_interval", "Proba", 0)] = False + _tags = { + "mtype": "pred_quantiles", + "python_dependencies": None, + "lossy": False, + } + def build(self): + pred_q = pd.DataFrame({0.2: [1, 2, 3], 0.6: [2, 3, 4]}) + pred_q.columns = pd.MultiIndex.from_product([["foo"], [0.2, 0.6]]) + + return pred_q + + +class ProbaUnivPredInt(ProbaUniv): + + _tags = { + "mtype": "pred_interval", + "python_dependencies": None, + "lossy": False, + } + + def build(self): + # we need to use this due to numerical inaccuracies + # from the binary based representation + pseudo_0_2 = 2 * np.abs(0.6 - 0.5) + + pred_int = pd.DataFrame({0.2: [1, 2, 3], 0.6: [2, 3, 4]}) + pred_int.columns = pd.MultiIndex.from_tuples( + [("foo", 0.6, "lower"), ("foo", pseudo_0_2, "upper")] + ) + + return pred_int -example_dict_metadata[("Proba", 0)] = { - "is_univariate": True, - "is_empty": False, - "has_nans": False, -} ### # example 1: multi -pred_q = pd.DataFrame({0.2: [1, 2, 3], 0.6: [2, 3, 4], 42: [5, 3, -1], 46: [5, 3, -1]}) -pred_q.columns = pd.MultiIndex.from_product([["foo", "bar"], [0.2, 0.6]]) - -example_dict[("pred_quantiles", "Proba", 1)] = pred_q -example_dict_lossy[("pred_quantiles", "Proba", 1)] = False - -pred_int = pd.DataFrame( - {0.2: [1, 2, 3], 0.6: [2, 3, 4], 42: [5, 3, -1], 46: [5, 3, -1]} -) -pred_int.columns = pd.MultiIndex.from_tuples( - [ - ("foo", 0.6, "lower"), - ("foo", pseudo_0_2, "upper"), - ("bar", 0.6, "lower"), - ("bar", pseudo_0_2, "upper"), - ] -) - -example_dict[("pred_interval", "Proba", 1)] = pred_int -example_dict_lossy[("pred_interval", "Proba", 1)] = False - - -example_dict_metadata[("Proba", 1)] = { - "is_univariate": False, - "is_empty": False, - "has_nans": False, -} +class ProbaMulti(BaseExample): + + _tags = { + "scitype": "Proba", + "index": 1, + "metadata": { + "is_univariate": False, + "is_empty": False, + "has_nans": False, + }, + } + + +class ProbaMultiPredQ(ProbaMulti): + + _tags = { + "mtype": "pred_quantiles", + "python_dependencies": None, + "lossy": False, + } + + def build(self): + pred_q = pd.DataFrame( + {0.2: [1, 2, 3], 0.6: [2, 3, 4], 42: [5, 3, -1], 46: [5, 3, -1]} + ) + pred_q.columns = pd.MultiIndex.from_product([["foo", "bar"], [0.2, 0.6]]) + + return pred_q + + +class ProbaMultiPredInt(ProbaMulti): + + _tags = { + "mtype": "pred_interval", + "python_dependencies": None, + "lossy": False, + } + + def build(self): + # we need to use this due to numerical inaccuracies + # from the binary based representation + pseudo_0_2 = 2 * np.abs(0.6 - 0.5) + + pred_int = pd.DataFrame( + {0.2: [1, 2, 3], 0.6: [2, 3, 4], 42: [5, 3, -1], 46: [5, 3, -1]} + ) + pred_int.columns = pd.MultiIndex.from_tuples( + [ + ("foo", 0.6, "lower"), + ("foo", pseudo_0_2, "upper"), + ("bar", 0.6, "lower"), + ("bar", pseudo_0_2, "upper"), + ] + ) + + return pred_int diff --git a/skpro/datatypes/_table/_examples.py b/skpro/datatypes/_table/_examples.py index 8fa20b6b9..b124fa148 100644 --- a/skpro/datatypes/_table/_examples.py +++ b/skpro/datatypes/_table/_examples.py @@ -25,7 +25,6 @@ import pandas as pd from skpro.datatypes._base import BaseExample -from skpro.utils.validation._dependencies import _check_soft_dependencies example_dict = dict() example_dict_lossy = dict() @@ -34,7 +33,6 @@ ### # example 0: univariate - class UnivTable(BaseExample): _tags = { From dbd6ca9d757e0d4b6154fcc6cbc0bce4d5029366 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 7 Sep 2024 22:58:44 +0100 Subject: [PATCH 04/11] Update _examples.py --- skpro/datatypes/_examples.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/skpro/datatypes/_examples.py b/skpro/datatypes/_examples.py index 456827a92..2ec9bc30a 100644 --- a/skpro/datatypes/_examples.py +++ b/skpro/datatypes/_examples.py @@ -23,12 +23,6 @@ "get_examples", ] -from skpro.datatypes._proba import ( - example_dict_lossy_Proba, - example_dict_metadata_Proba, - example_dict_Proba, -) - @lru_cache(maxsize=1) def generate_example_dicts(soft_deps="present"): From 5e52cca96d9bf2df94900136c86cb624cd501dfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 7 Sep 2024 23:30:08 +0100 Subject: [PATCH 05/11] remove imports --- skpro/datatypes/_proba/__init__.py | 10 ---------- skpro/datatypes/_table/__init__.py | 10 ---------- 2 files changed, 20 deletions(-) diff --git a/skpro/datatypes/_proba/__init__.py b/skpro/datatypes/_proba/__init__.py index 1c9d34736..af3ce9ccc 100644 --- a/skpro/datatypes/_proba/__init__.py +++ b/skpro/datatypes/_proba/__init__.py @@ -2,13 +2,6 @@ from skpro.datatypes._proba._check import check_dict as check_dict_Proba from skpro.datatypes._proba._convert import convert_dict as convert_dict_Proba -from skpro.datatypes._proba._examples import example_dict as example_dict_Proba -from skpro.datatypes._proba._examples import ( - example_dict_lossy as example_dict_lossy_Proba, -) -from skpro.datatypes._proba._examples import ( - example_dict_metadata as example_dict_metadata_Proba, -) from skpro.datatypes._proba._registry import MTYPE_LIST_PROBA, MTYPE_REGISTER_PROBA __all__ = [ @@ -16,7 +9,4 @@ "convert_dict_Proba", "MTYPE_LIST_PROBA", "MTYPE_REGISTER_PROBA", - "example_dict_Proba", - "example_dict_lossy_Proba", - "example_dict_metadata_Proba", ] diff --git a/skpro/datatypes/_table/__init__.py b/skpro/datatypes/_table/__init__.py index 0481dcaee..6a8fe370a 100644 --- a/skpro/datatypes/_table/__init__.py +++ b/skpro/datatypes/_table/__init__.py @@ -1,20 +1,10 @@ """Module exports: Series type checkers, converters and mtype inference.""" from skpro.datatypes._table._convert import convert_dict as convert_dict_Table -from skpro.datatypes._table._examples import example_dict as example_dict_Table -from skpro.datatypes._table._examples import ( - example_dict_lossy as example_dict_lossy_Table, -) -from skpro.datatypes._table._examples import ( - example_dict_metadata as example_dict_metadata_Table, -) from skpro.datatypes._table._registry import MTYPE_LIST_TABLE, MTYPE_REGISTER_TABLE __all__ = [ "convert_dict_Table", "MTYPE_LIST_TABLE", "MTYPE_REGISTER_TABLE", - "example_dict_Table", - "example_dict_lossy_Table", - "example_dict_metadata_Table", ] From 6441b958043b1e51650a03439778fb99d2e90f2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sun, 8 Sep 2024 10:29:04 +0100 Subject: [PATCH 06/11] linting --- skpro/datatypes/_proba/_examples.py | 6 ------ skpro/datatypes/_table/_examples.py | 32 ++++++++--------------------- 2 files changed, 8 insertions(+), 30 deletions(-) diff --git a/skpro/datatypes/_proba/_examples.py b/skpro/datatypes/_proba/_examples.py index c81912522..9e99e7b25 100644 --- a/skpro/datatypes/_proba/_examples.py +++ b/skpro/datatypes/_proba/_examples.py @@ -37,7 +37,6 @@ # example 0: univariate class ProbaUniv(BaseExample): - _tags = { "scitype": "Proba", "index": 0, @@ -50,7 +49,6 @@ class ProbaUniv(BaseExample): class ProbaUnivPredQ(ProbaUniv): - _tags = { "mtype": "pred_quantiles", "python_dependencies": None, @@ -65,7 +63,6 @@ def build(self): class ProbaUnivPredInt(ProbaUniv): - _tags = { "mtype": "pred_interval", "python_dependencies": None, @@ -89,7 +86,6 @@ def build(self): # example 1: multi class ProbaMulti(BaseExample): - _tags = { "scitype": "Proba", "index": 1, @@ -102,7 +98,6 @@ class ProbaMulti(BaseExample): class ProbaMultiPredQ(ProbaMulti): - _tags = { "mtype": "pred_quantiles", "python_dependencies": None, @@ -119,7 +114,6 @@ def build(self): class ProbaMultiPredInt(ProbaMulti): - _tags = { "mtype": "pred_interval", "python_dependencies": None, diff --git a/skpro/datatypes/_table/_examples.py b/skpro/datatypes/_table/_examples.py index b124fa148..362cd2a88 100644 --- a/skpro/datatypes/_table/_examples.py +++ b/skpro/datatypes/_table/_examples.py @@ -34,7 +34,6 @@ # example 0: univariate class UnivTable(BaseExample): - _tags = { "scitype": "Table", "index": 0, @@ -50,7 +49,6 @@ class UnivTable(BaseExample): class UnivTableDf(UnivTable): - _tags = { "mtype": "pd_DataFrame_Table", "python_dependencies": None, @@ -62,7 +60,6 @@ def build(self): class UnivTableNumpy2D(UnivTable): - _tags = { "mtype": "numpy2D", "python_dependencies": None, @@ -74,7 +71,6 @@ def build(self): class UnivTableNumpy1D(UnivTable): - _tags = { "mtype": "numpy1D", "python_dependencies": None, @@ -86,7 +82,6 @@ def build(self): class UnivTableSeries(UnivTable): - _tags = { "mtype": "pd_Series_Table", "python_dependencies": None, @@ -98,7 +93,6 @@ def build(self): class UnivTableListOfDict(UnivTable): - _tags = { "mtype": "list_of_dict", "python_dependencies": None, @@ -110,7 +104,6 @@ def build(self): class UnivTablePolarsEager(UnivTable): - _tags = { "mtype": "polars_eager_table", "python_dependencies": ["polars", "pyarrow"], @@ -125,7 +118,6 @@ def build(self): class UnivTablePolarsLazy(UnivTable): - _tags = { "mtype": "polars_lazy_table", "python_dependencies": ["polars", "pyarrow"], @@ -143,7 +135,6 @@ def build(self): # example 1: multivariate class MultivTable(BaseExample): - _tags = { "scitype": "Table", "index": 1, @@ -159,7 +150,6 @@ class MultivTable(BaseExample): class MultivTableDf(MultivTable): - _tags = { "mtype": "pd_DataFrame_Table", "python_dependencies": None, @@ -171,7 +161,6 @@ def build(self): class MultivTableNumpy2D(MultivTable): - _tags = { "mtype": "numpy2D", "python_dependencies": None, @@ -183,19 +172,17 @@ def build(self): class MultivTableNumpy1D(MultivTable): - - _tags = { - "mtype": "numpy1D", - "python_dependencies": None, - "lossy": None, - } - - def build(self): - return None + _tags = { + "mtype": "numpy1D", + "python_dependencies": None, + "lossy": None, + } + def build(self): + return None -class MultivTableSeries(MultivTable): +class MultivTableSeries(MultivTable): _tags = { "mtype": "pd_Series_Table", "python_dependencies": None, @@ -207,7 +194,6 @@ def build(self): class MultivTableListOfDict(MultivTable): - _tags = { "mtype": "list_of_dict", "python_dependencies": None, @@ -224,7 +210,6 @@ def build(self): class MultivTablePolarsEager(MultivTable): - _tags = { "mtype": "polars_eager_table", "python_dependencies": ["polars", "pyarrow"], @@ -239,7 +224,6 @@ def build(self): class MultivTablePolarsLazy(MultivTable): - _tags = { "mtype": "polars_lazy_table", "python_dependencies": ["polars", "pyarrow"], From d8ab6c5b48a8eea152ebf95b4dcc9565e3f7003c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sun, 8 Sep 2024 10:29:27 +0100 Subject: [PATCH 07/11] linting --- skpro/datatypes/_proba/_examples.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/skpro/datatypes/_proba/_examples.py b/skpro/datatypes/_proba/_examples.py index 9e99e7b25..97d402662 100644 --- a/skpro/datatypes/_proba/_examples.py +++ b/skpro/datatypes/_proba/_examples.py @@ -98,19 +98,19 @@ class ProbaMulti(BaseExample): class ProbaMultiPredQ(ProbaMulti): - _tags = { - "mtype": "pred_quantiles", - "python_dependencies": None, - "lossy": False, - } - - def build(self): - pred_q = pd.DataFrame( - {0.2: [1, 2, 3], 0.6: [2, 3, 4], 42: [5, 3, -1], 46: [5, 3, -1]} - ) - pred_q.columns = pd.MultiIndex.from_product([["foo", "bar"], [0.2, 0.6]]) - - return pred_q + _tags = { + "mtype": "pred_quantiles", + "python_dependencies": None, + "lossy": False, + } + + def build(self): + pred_q = pd.DataFrame( + {0.2: [1, 2, 3], 0.6: [2, 3, 4], 42: [5, 3, -1], 46: [5, 3, -1]} + ) + pred_q.columns = pd.MultiIndex.from_product([["foo", "bar"], [0.2, 0.6]]) + + return pred_q class ProbaMultiPredInt(ProbaMulti): From f1cf23bac0b65ed6f8439515fa1caab7911cc94d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sun, 8 Sep 2024 12:45:17 +0100 Subject: [PATCH 08/11] linting --- skpro/datatypes/_proba/_examples.py | 14 +++++++----- skpro/datatypes/_table/_examples.py | 34 +++++++++++++++-------------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/skpro/datatypes/_proba/_examples.py b/skpro/datatypes/_proba/_examples.py index 97d402662..38f103852 100644 --- a/skpro/datatypes/_proba/_examples.py +++ b/skpro/datatypes/_proba/_examples.py @@ -36,7 +36,8 @@ ### # example 0: univariate -class ProbaUniv(BaseExample): + +class _ProbaUniv(BaseExample): _tags = { "scitype": "Proba", "index": 0, @@ -48,7 +49,7 @@ class ProbaUniv(BaseExample): } -class ProbaUnivPredQ(ProbaUniv): +class _ProbaUnivPredQ(_ProbaUniv): _tags = { "mtype": "pred_quantiles", "python_dependencies": None, @@ -62,7 +63,7 @@ def build(self): return pred_q -class ProbaUnivPredInt(ProbaUniv): +class _ProbaUnivPredInt(_ProbaUniv): _tags = { "mtype": "pred_interval", "python_dependencies": None, @@ -85,7 +86,8 @@ def build(self): ### # example 1: multi -class ProbaMulti(BaseExample): + +class _ProbaMulti(BaseExample): _tags = { "scitype": "Proba", "index": 1, @@ -97,7 +99,7 @@ class ProbaMulti(BaseExample): } -class ProbaMultiPredQ(ProbaMulti): +class _ProbaMultiPredQ(_ProbaMulti): _tags = { "mtype": "pred_quantiles", "python_dependencies": None, @@ -113,7 +115,7 @@ def build(self): return pred_q -class ProbaMultiPredInt(ProbaMulti): +class _ProbaMultiPredInt(_ProbaMulti): _tags = { "mtype": "pred_interval", "python_dependencies": None, diff --git a/skpro/datatypes/_table/_examples.py b/skpro/datatypes/_table/_examples.py index 362cd2a88..bc60a549a 100644 --- a/skpro/datatypes/_table/_examples.py +++ b/skpro/datatypes/_table/_examples.py @@ -33,7 +33,8 @@ ### # example 0: univariate -class UnivTable(BaseExample): + +class _UnivTable(BaseExample): _tags = { "scitype": "Table", "index": 0, @@ -48,7 +49,7 @@ class UnivTable(BaseExample): } -class UnivTableDf(UnivTable): +class _UnivTableDf(_UnivTable): _tags = { "mtype": "pd_DataFrame_Table", "python_dependencies": None, @@ -59,7 +60,7 @@ def build(self): return pd.DataFrame({"a": [1, 4, 0.5, -3]}) -class UnivTableNumpy2D(UnivTable): +class _UnivTableNumpy2D(_UnivTable): _tags = { "mtype": "numpy2D", "python_dependencies": None, @@ -70,7 +71,7 @@ def build(self): return np.array([[1], [4], [0.5], [-3]]) -class UnivTableNumpy1D(UnivTable): +class _UnivTableNumpy1D(_UnivTable): _tags = { "mtype": "numpy1D", "python_dependencies": None, @@ -81,7 +82,7 @@ def build(self): return np.array([1, 4, 0.5, -3]) -class UnivTableSeries(UnivTable): +class _UnivTableSeries(_UnivTable): _tags = { "mtype": "pd_Series_Table", "python_dependencies": None, @@ -92,7 +93,7 @@ def build(self): return pd.Series([1, 4, 0.5, -3]) -class UnivTableListOfDict(UnivTable): +class _UnivTableListOfDict(_UnivTable): _tags = { "mtype": "list_of_dict", "python_dependencies": None, @@ -103,7 +104,7 @@ def build(self): return [{"a": 1.0}, {"a": 4.0}, {"a": 0.5}, {"a": -3.0}] -class UnivTablePolarsEager(UnivTable): +class _UnivTablePolarsEager(_UnivTable): _tags = { "mtype": "polars_eager_table", "python_dependencies": ["polars", "pyarrow"], @@ -117,7 +118,7 @@ def build(self): return convert_pandas_to_polars_with_index(df) -class UnivTablePolarsLazy(UnivTable): +class _UnivTablePolarsLazy(_UnivTable): _tags = { "mtype": "polars_lazy_table", "python_dependencies": ["polars", "pyarrow"], @@ -134,7 +135,8 @@ def build(self): ### # example 1: multivariate -class MultivTable(BaseExample): + +class _MultivTable(BaseExample): _tags = { "scitype": "Table", "index": 1, @@ -149,7 +151,7 @@ class MultivTable(BaseExample): } -class MultivTableDf(MultivTable): +class _MultivTableDf(_MultivTable): _tags = { "mtype": "pd_DataFrame_Table", "python_dependencies": None, @@ -160,7 +162,7 @@ def build(self): return pd.DataFrame({"a": [1, 4, 0.5, -3], "b": [3, 7, 2, -3 / 7]}) -class MultivTableNumpy2D(MultivTable): +class _MultivTableNumpy2D(_MultivTable): _tags = { "mtype": "numpy2D", "python_dependencies": None, @@ -171,7 +173,7 @@ def build(self): return np.array([[1, 3], [4, 7], [0.5, 2], [-3, -3 / 7]]) -class MultivTableNumpy1D(MultivTable): +class _MultivTableNumpy1D(_MultivTable): _tags = { "mtype": "numpy1D", "python_dependencies": None, @@ -182,7 +184,7 @@ def build(self): return None -class MultivTableSeries(MultivTable): +class _MultivTableSeries(_MultivTable): _tags = { "mtype": "pd_Series_Table", "python_dependencies": None, @@ -193,7 +195,7 @@ def build(self): return None -class MultivTableListOfDict(MultivTable): +class _MultivTableListOfDict(_MultivTable): _tags = { "mtype": "list_of_dict", "python_dependencies": None, @@ -209,7 +211,7 @@ def build(self): ] -class MultivTablePolarsEager(MultivTable): +class _MultivTablePolarsEager(_MultivTable): _tags = { "mtype": "polars_eager_table", "python_dependencies": ["polars", "pyarrow"], @@ -223,7 +225,7 @@ def build(self): return convert_pandas_to_polars_with_index(df) -class MultivTablePolarsLazy(MultivTable): +class _MultivTablePolarsLazy(_MultivTable): _tags = { "mtype": "polars_lazy_table", "python_dependencies": ["polars", "pyarrow"], From 342cbef2ab98cf3e404c1f400d4a4c4ed0d1b091 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sun, 8 Sep 2024 15:38:48 +0100 Subject: [PATCH 09/11] Update _examples.py --- skpro/datatypes/_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skpro/datatypes/_examples.py b/skpro/datatypes/_examples.py index 2ec9bc30a..5289c1b41 100644 --- a/skpro/datatypes/_examples.py +++ b/skpro/datatypes/_examples.py @@ -48,7 +48,7 @@ def generate_example_dicts(soft_deps="present"): k = cls() key = k._get_key() key_meta = (key[1], key[2]) - example_dict[key] = k + example_dict[key] = k.build() example_dict_lossy[key] = k.get_class_tags().get("lossy", False) example_dict_metadata[key_meta] = k.get_class_tags().get("metadata", {}) From cf6b666eb9f1d7a5519bcd7950917148a0448e5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sun, 8 Sep 2024 15:39:34 +0100 Subject: [PATCH 10/11] Update _examples.py --- skpro/datatypes/_examples.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/skpro/datatypes/_examples.py b/skpro/datatypes/_examples.py index 5289c1b41..8124f51a1 100644 --- a/skpro/datatypes/_examples.py +++ b/skpro/datatypes/_examples.py @@ -13,8 +13,6 @@ e.g., metadata such as column names are missing """ -from functools import lru_cache - from skpro.datatypes._registry import mtype_to_scitype __author__ = ["fkiraly"] @@ -24,7 +22,6 @@ ] -@lru_cache(maxsize=1) def generate_example_dicts(soft_deps="present"): """Generate example dicts using lookup.""" from skbase.utils.dependencies import _check_estimator_deps From 43e67188daa5203a0f172d9a5e992576b6574a43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sun, 8 Sep 2024 15:41:19 +0100 Subject: [PATCH 11/11] Update _examples.py --- skpro/datatypes/_examples.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/skpro/datatypes/_examples.py b/skpro/datatypes/_examples.py index 8124f51a1..6463a2fb3 100644 --- a/skpro/datatypes/_examples.py +++ b/skpro/datatypes/_examples.py @@ -13,6 +13,8 @@ e.g., metadata such as column names are missing """ +from functools import lru_cache + from skpro.datatypes._registry import mtype_to_scitype __author__ = ["fkiraly"] @@ -22,6 +24,7 @@ ] +@lru_cache(maxsize=1) def generate_example_dicts(soft_deps="present"): """Generate example dicts using lookup.""" from skbase.utils.dependencies import _check_estimator_deps @@ -45,7 +48,7 @@ def generate_example_dicts(soft_deps="present"): k = cls() key = k._get_key() key_meta = (key[1], key[2]) - example_dict[key] = k.build() + example_dict[key] = k example_dict_lossy[key] = k.get_class_tags().get("lossy", False) example_dict_metadata[key_meta] = k.get_class_tags().get("metadata", {}) @@ -96,14 +99,14 @@ def get_examples( for k in keys: if return_lossy: - fixtures[k[2]] = (example_dict.get(k), example_dict_lossy.get(k)) + fixtures[k[2]] = (example_dict.get(k).build(), example_dict_lossy.get(k)) elif return_metadata: fixtures[k[2]] = ( - example_dict.get(k), + example_dict.get(k).build(), example_dict_lossy.get(k), example_dict_metadata.get((k[1], k[2])), ) else: - fixtures[k[2]] = example_dict.get(k) + fixtures[k[2]] = example_dict.get(k).build() return fixtures