scikit-learn-contrib
diff --git a/‎HISTORY.rst‎
Lines changed: 6 additions & 1 deletion b/‎HISTORY.rst‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎README.rst‎
Lines changed: 4 additions & 4 deletions b/‎README.rst‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/images/readme_tabular_comparison.png‎
-115 KB b/‎docs/images/readme_tabular_comparison.png‎
-115 KB
diff --git a/‎examples/tutorials/plot_tuto_benchmark_TS.py‎
Lines changed: 0 additions & 1 deletion b/‎examples/tutorials/plot_tuto_benchmark_TS.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/tutorials/plot_tuto_categorical.py‎
Lines changed: 0 additions & 1 deletion b/‎examples/tutorials/plot_tuto_categorical.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/tutorials/plot_tuto_diffusion_models.py‎
Lines changed: 0 additions & 2 deletions b/‎examples/tutorials/plot_tuto_diffusion_models.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎examples/tutorials/plot_tuto_mean_median.py‎
Lines changed: 0 additions & 1 deletion b/‎examples/tutorials/plot_tuto_mean_median.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎qolmat/benchmark/comparator.py‎
Lines changed: 0 additions & 5 deletions b/‎qolmat/benchmark/comparator.py‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎qolmat/benchmark/metrics.py‎
Lines changed: 11 additions & 2 deletions b/‎qolmat/benchmark/metrics.py‎
Lines changed: 11 additions & 2 deletions
@@ -2,7 +2,12 @@
 History
 =======
 
-0.1.8 (2024-08-29)
+0.1.10 (2024-??-??)
+------------------
+* Long EM and RPCA operations wrapped with tqdm progress bars
+* Readme code sample updated, and results table made consistant
+
+0.1.9 (2024-08-29)
 ------------------
 * Tutorials reproducibility improved with random_state parameters
 * RPCA now accepts random_state parameters
 
@@ -70,26 +70,26 @@ With just these few lines of code, you can see how easy it is to
   from qolmat.utils import data
 
   # load and prepare csv data
+
   df_data = data.get_data("Beijing")
   columns = ["TEMP", "PRES", "WSPM"]
   df_data = df_data[columns]
   df_with_nan = data.add_holes(df_data, ratio_masked=0.2, mean_size=120)
 
   # impute and compare
-  imputer_mean = imputers.ImputerSimple(strategy="mean", groups=("station",))
+  imputer_median = imputers.ImputerSimple(groups=("station",))
   imputer_interpol = imputers.ImputerInterpolation(method="linear", groups=("station",))
   imputer_var1 = imputers.ImputerEM(model="VAR", groups=("station",), method="mle", max_iter_em=50, n_iter_ou=15, dt=1e-3, p=1)
   dict_imputers = {
-        "mean": imputer_mean,
+        "median": imputer_median,
         "interpolation": imputer_interpol,
         "VAR(1) process": imputer_var1
     }
   generator_holes = missing_patterns.EmpiricalHoleGenerator(n_splits=4, ratio_masked=0.1)
   comparison = comparator.Comparator(
         dict_imputers,
-        columns,
         generator_holes = generator_holes,
-        metrics = ["mae", "wmape", "kl_columnwise", "ks_test", "energy"],
+        metrics = ["mae", "wmape", "kl_columnwise", "frechet"],
     )
   results = comparison.compare(df_with_nan)
   results.style.highlight_min(color="lightsteelblue", axis=1)
 
@@ -128,7 +128,6 @@
 
 comparison = comparator.Comparator(
     dict_imputers,
-    cols_to_impute,
     generator_holes=generator_holes,
     metrics=["mae", "wmape", "kl_columnwise", "wasserstein_columnwise"],
     max_evals=10,
 
@@ -89,7 +89,6 @@
 
 comparison = comparator.Comparator(
     dict_imputers,
-    cols_to_impute,
     generator_holes=generator_holes,
     metrics=metrics,
     max_evals=2,
 
@@ -169,7 +169,6 @@
 
 comparison = comparator.Comparator(
     dict_imputers,
-    selected_columns=df_data.columns,
     generator_holes=missing_patterns.UniformHoleGenerator(n_splits=2, random_state=rng),
     metrics=["mae", "kl_columnwise"],
 )
@@ -224,7 +223,6 @@
 
 comparison = comparator.Comparator(
     dict_imputers,
-    selected_columns=df_data.columns,
     generator_holes=missing_patterns.UniformHoleGenerator(n_splits=2, random_state=rng),
     metrics=["mae", "kl_columnwise"],
 )
 
@@ -123,7 +123,6 @@
 
 comparison = comparator.Comparator(
     dict_imputers,
-    cols_to_impute,
     generator_holes=generator_holes,
     metrics=metrics,
     max_evals=5,
 
@@ -45,6 +45,7 @@ statsmodels = ">= 0.14.0"
 typed-ast = { version = "*", optional = true }
 category-encoders = "^2.6.3"
 dcor = ">= 0.6"
+tqdm = "*"
 
 [tool.poetry.group.torch.dependencies]
 torch = "< 2.5"
 
@@ -28,9 +28,6 @@ class Comparator:
     ----------
     dict_models: Dict[str, any]
         dictionary of imputation methods
-    selected_columns: List[str]Œ
-        list of column's names selected (all with at least one null value will
-        be imputed)
     columnwise_evaluation : Optional[bool], optional
         whether the metric should be calculated column-wise or not,
         by default False
@@ -46,7 +43,6 @@ class Comparator:
     def __init__(
         self,
         dict_models: Dict[str, Any],
-        selected_columns: List[str],
         generator_holes: _HoleGenerator,
         metrics: List = ["mae", "wmape", "kl_columnwise"],
         dict_config_opti: Optional[Dict[str, Any]] = {},
@@ -55,7 +51,6 @@ def __init__(
         verbose: bool = False,
     ):
         self.dict_imputers = dict_models
-        self.selected_columns = selected_columns
         self.generator_holes = generator_holes
         self.metrics = metrics
         self.dict_config_opti = dict_config_opti
 
@@ -835,6 +835,7 @@ def sum_pairwise_distances(
 def frechet_distance_base(
     df1: pd.DataFrame,
     df2: pd.DataFrame,
+    df_mask: pd.DataFrame,
 ) -> pd.Series:
     """Compute the Fréchet distance between two dataframes df1 and df2.
 
@@ -853,16 +854,24 @@ def frechet_distance_base(
         true dataframe
     df2 : pd.DataFrame
         predicted dataframe
+    df_mask : pd.DataFrame
+        Elements of the dataframes to compute on
 
     Returns
     -------
     pd.Series
         Frechet distance in a Series object
 
     """
-    if df1.shape != df2.shape:
+    if df1.shape != df2.shape or df1.shape != df_mask.shape:
         raise Exception("inputs have to be of same dimensions.")
 
+    df1 = df1.copy()
+    df2 = df2.copy()
+    # Set to nan the values not in the mask
+    df1[~df_mask] = np.nan
+    df2[~df_mask] = np.nan
+
     std = (np.std(df1) + np.std(df2) + EPS) / 2
     mu = (np.nanmean(df1, axis=0) + np.nanmean(df2, axis=0)) / 2
     df1 = (df1 - mu) / std
@@ -911,7 +920,7 @@ def frechet_distance(
 
     """
     if method == "single":
-        return frechet_distance_base(df1, df2)
+        return frechet_distance_base(df1, df2, df_mask)
     return pattern_based_weighted_mean_metric(
         df1,
         df2,
Original file line number	Diff line number	Diff line change
`@@ -169,7 +169,6 @@`
`169`	`169`
`170`	`170`	`comparison = comparator.Comparator(`
`171`	`171`	`dict_imputers,`
`172`		`- selected_columns=df_data.columns,`
`173`	`172`	`generator_holes=missing_patterns.UniformHoleGenerator(n_splits=2, random_state=rng),`
`174`	`173`	`metrics=["mae", "kl_columnwise"],`
`175`	`174`	`)`
`@@ -224,7 +223,6 @@`
`224`	`223`
`225`	`224`	`comparison = comparator.Comparator(`
`226`	`225`	`dict_imputers,`
`227`		`- selected_columns=df_data.columns,`
`228`	`226`	`generator_holes=missing_patterns.UniformHoleGenerator(n_splits=2, random_state=rng),`
`229`	`227`	`metrics=["mae", "kl_columnwise"],`
`230`	`228`	`)`