Skip to content

Commit 6383835

Browse files
Merge pull request #176 from scikit-learn-contrib/chore/readme_fix
Chore/readme fix
2 parents d31223e + a63d6fc commit 6383835

17 files changed

+145
-99
lines changed

HISTORY.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@
22
History
33
=======
44

5-
0.1.8 (2024-08-29)
5+
0.1.10 (2024-??-??)
6+
------------------
7+
* Long EM and RPCA operations wrapped with tqdm progress bars
8+
* Readme code sample updated, and results table made consistant
9+
10+
0.1.9 (2024-08-29)
611
------------------
712
* Tutorials reproducibility improved with random_state parameters
813
* RPCA now accepts random_state parameters

README.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,26 +70,26 @@ With just these few lines of code, you can see how easy it is to
7070
from qolmat.utils import data
7171
7272
# load and prepare csv data
73+
7374
df_data = data.get_data("Beijing")
7475
columns = ["TEMP", "PRES", "WSPM"]
7576
df_data = df_data[columns]
7677
df_with_nan = data.add_holes(df_data, ratio_masked=0.2, mean_size=120)
7778
7879
# impute and compare
79-
imputer_mean = imputers.ImputerSimple(strategy="mean", groups=("station",))
80+
imputer_median = imputers.ImputerSimple(groups=("station",))
8081
imputer_interpol = imputers.ImputerInterpolation(method="linear", groups=("station",))
8182
imputer_var1 = imputers.ImputerEM(model="VAR", groups=("station",), method="mle", max_iter_em=50, n_iter_ou=15, dt=1e-3, p=1)
8283
dict_imputers = {
83-
"mean": imputer_mean,
84+
"median": imputer_median,
8485
"interpolation": imputer_interpol,
8586
"VAR(1) process": imputer_var1
8687
}
8788
generator_holes = missing_patterns.EmpiricalHoleGenerator(n_splits=4, ratio_masked=0.1)
8889
comparison = comparator.Comparator(
8990
dict_imputers,
90-
columns,
9191
generator_holes = generator_holes,
92-
metrics = ["mae", "wmape", "kl_columnwise", "ks_test", "energy"],
92+
metrics = ["mae", "wmape", "kl_columnwise", "frechet"],
9393
)
9494
results = comparison.compare(df_with_nan)
9595
results.style.highlight_min(color="lightsteelblue", axis=1)
-115 KB
Loading

examples/tutorials/plot_tuto_benchmark_TS.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@
128128

129129
comparison = comparator.Comparator(
130130
dict_imputers,
131-
cols_to_impute,
132131
generator_holes=generator_holes,
133132
metrics=["mae", "wmape", "kl_columnwise", "wasserstein_columnwise"],
134133
max_evals=10,

examples/tutorials/plot_tuto_categorical.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@
8989

9090
comparison = comparator.Comparator(
9191
dict_imputers,
92-
cols_to_impute,
9392
generator_holes=generator_holes,
9493
metrics=metrics,
9594
max_evals=2,

examples/tutorials/plot_tuto_diffusion_models.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,6 @@
169169

170170
comparison = comparator.Comparator(
171171
dict_imputers,
172-
selected_columns=df_data.columns,
173172
generator_holes=missing_patterns.UniformHoleGenerator(n_splits=2, random_state=rng),
174173
metrics=["mae", "kl_columnwise"],
175174
)
@@ -224,7 +223,6 @@
224223

225224
comparison = comparator.Comparator(
226225
dict_imputers,
227-
selected_columns=df_data.columns,
228226
generator_holes=missing_patterns.UniformHoleGenerator(n_splits=2, random_state=rng),
229227
metrics=["mae", "kl_columnwise"],
230228
)

examples/tutorials/plot_tuto_mean_median.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@
123123

124124
comparison = comparator.Comparator(
125125
dict_imputers,
126-
cols_to_impute,
127126
generator_holes=generator_holes,
128127
metrics=metrics,
129128
max_evals=5,

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ statsmodels = ">= 0.14.0"
4545
typed-ast = { version = "*", optional = true }
4646
category-encoders = "^2.6.3"
4747
dcor = ">= 0.6"
48+
tqdm = "*"
4849

4950
[tool.poetry.group.torch.dependencies]
5051
torch = "< 2.5"

qolmat/benchmark/comparator.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,6 @@ class Comparator:
2828
----------
2929
dict_models: Dict[str, any]
3030
dictionary of imputation methods
31-
selected_columns: List[str]Œ
32-
list of column's names selected (all with at least one null value will
33-
be imputed)
3431
columnwise_evaluation : Optional[bool], optional
3532
whether the metric should be calculated column-wise or not,
3633
by default False
@@ -46,7 +43,6 @@ class Comparator:
4643
def __init__(
4744
self,
4845
dict_models: Dict[str, Any],
49-
selected_columns: List[str],
5046
generator_holes: _HoleGenerator,
5147
metrics: List = ["mae", "wmape", "kl_columnwise"],
5248
dict_config_opti: Optional[Dict[str, Any]] = {},
@@ -55,7 +51,6 @@ def __init__(
5551
verbose: bool = False,
5652
):
5753
self.dict_imputers = dict_models
58-
self.selected_columns = selected_columns
5954
self.generator_holes = generator_holes
6055
self.metrics = metrics
6156
self.dict_config_opti = dict_config_opti

qolmat/benchmark/metrics.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,7 @@ def sum_pairwise_distances(
835835
def frechet_distance_base(
836836
df1: pd.DataFrame,
837837
df2: pd.DataFrame,
838+
df_mask: pd.DataFrame,
838839
) -> pd.Series:
839840
"""Compute the Fréchet distance between two dataframes df1 and df2.
840841
@@ -853,16 +854,24 @@ def frechet_distance_base(
853854
true dataframe
854855
df2 : pd.DataFrame
855856
predicted dataframe
857+
df_mask : pd.DataFrame
858+
Elements of the dataframes to compute on
856859
857860
Returns
858861
-------
859862
pd.Series
860863
Frechet distance in a Series object
861864
862865
"""
863-
if df1.shape != df2.shape:
866+
if df1.shape != df2.shape or df1.shape != df_mask.shape:
864867
raise Exception("inputs have to be of same dimensions.")
865868

869+
df1 = df1.copy()
870+
df2 = df2.copy()
871+
# Set to nan the values not in the mask
872+
df1[~df_mask] = np.nan
873+
df2[~df_mask] = np.nan
874+
866875
std = (np.std(df1) + np.std(df2) + EPS) / 2
867876
mu = (np.nanmean(df1, axis=0) + np.nanmean(df2, axis=0)) / 2
868877
df1 = (df1 - mu) / std
@@ -911,7 +920,7 @@ def frechet_distance(
911920
912921
"""
913922
if method == "single":
914-
return frechet_distance_base(df1, df2)
923+
return frechet_distance_base(df1, df2, df_mask)
915924
return pattern_based_weighted_mean_metric(
916925
df1,
917926
df2,

0 commit comments

Comments
 (0)