Skip to content

Commit 44805a2

Browse files
Merge pull request #177 from scikit-learn-contrib/chore/readme_fix
Chore/readme fix
2 parents 4491c7a + 53b96a3 commit 44805a2

File tree

13 files changed

+101
-95
lines changed

13 files changed

+101
-95
lines changed

HISTORY.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ History
55
0.1.10 (2024-??-??)
66
------------------
77
* Long EM and RPCA operations wrapped with tqdm progress bars
8+
* Readme code sample updated, and results table made consistant
89

910
0.1.9 (2024-08-29)
1011
------------------

README.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,8 @@ With just these few lines of code, you can see how easy it is to
8888
generator_holes = missing_patterns.EmpiricalHoleGenerator(n_splits=4, ratio_masked=0.1)
8989
comparison = comparator.Comparator(
9090
dict_imputers,
91-
columns,
9291
generator_holes = generator_holes,
93-
metrics = ["mae", "wmape", "kl_columnwise", "ks_test", "energy"],
92+
metrics = ["mae", "wmape", "kl_columnwise", "frechet"],
9493
)
9594
results = comparison.compare(df_with_nan)
9695
results.style.highlight_min(color="lightsteelblue", axis=1)
-115 KB
Loading

examples/tutorials/plot_tuto_benchmark_TS.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@
128128

129129
comparison = comparator.Comparator(
130130
dict_imputers,
131-
cols_to_impute,
132131
generator_holes=generator_holes,
133132
metrics=["mae", "wmape", "kl_columnwise", "wasserstein_columnwise"],
134133
max_evals=10,

examples/tutorials/plot_tuto_categorical.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@
8989

9090
comparison = comparator.Comparator(
9191
dict_imputers,
92-
cols_to_impute,
9392
generator_holes=generator_holes,
9493
metrics=metrics,
9594
max_evals=2,

examples/tutorials/plot_tuto_diffusion_models.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,6 @@
169169

170170
comparison = comparator.Comparator(
171171
dict_imputers,
172-
selected_columns=df_data.columns,
173172
generator_holes=missing_patterns.UniformHoleGenerator(n_splits=2, random_state=rng),
174173
metrics=["mae", "kl_columnwise"],
175174
)
@@ -224,7 +223,6 @@
224223

225224
comparison = comparator.Comparator(
226225
dict_imputers,
227-
selected_columns=df_data.columns,
228226
generator_holes=missing_patterns.UniformHoleGenerator(n_splits=2, random_state=rng),
229227
metrics=["mae", "kl_columnwise"],
230228
)

examples/tutorials/plot_tuto_mean_median.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@
123123

124124
comparison = comparator.Comparator(
125125
dict_imputers,
126-
cols_to_impute,
127126
generator_holes=generator_holes,
128127
metrics=metrics,
129128
max_evals=5,

qolmat/benchmark/comparator.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,6 @@ class Comparator:
2828
----------
2929
dict_models: Dict[str, any]
3030
dictionary of imputation methods
31-
selected_columns: List[str]Œ
32-
list of column's names selected (all with at least one null value will
33-
be imputed)
3431
columnwise_evaluation : Optional[bool], optional
3532
whether the metric should be calculated column-wise or not,
3633
by default False
@@ -46,7 +43,6 @@ class Comparator:
4643
def __init__(
4744
self,
4845
dict_models: Dict[str, Any],
49-
selected_columns: List[str],
5046
generator_holes: _HoleGenerator,
5147
metrics: List = ["mae", "wmape", "kl_columnwise"],
5248
dict_config_opti: Optional[Dict[str, Any]] = {},
@@ -55,7 +51,6 @@ def __init__(
5551
verbose: bool = False,
5652
):
5753
self.dict_imputers = dict_models
58-
self.selected_columns = selected_columns
5954
self.generator_holes = generator_holes
6055
self.metrics = metrics
6156
self.dict_config_opti = dict_config_opti

qolmat/benchmark/metrics.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,7 @@ def sum_pairwise_distances(
835835
def frechet_distance_base(
836836
df1: pd.DataFrame,
837837
df2: pd.DataFrame,
838+
df_mask: pd.DataFrame,
838839
) -> pd.Series:
839840
"""Compute the Fréchet distance between two dataframes df1 and df2.
840841
@@ -853,16 +854,24 @@ def frechet_distance_base(
853854
true dataframe
854855
df2 : pd.DataFrame
855856
predicted dataframe
857+
df_mask : pd.DataFrame
858+
Elements of the dataframes to compute on
856859
857860
Returns
858861
-------
859862
pd.Series
860863
Frechet distance in a Series object
861864
862865
"""
863-
if df1.shape != df2.shape:
866+
if df1.shape != df2.shape or df1.shape != df_mask.shape:
864867
raise Exception("inputs have to be of same dimensions.")
865868

869+
df1 = df1.copy()
870+
df2 = df2.copy()
871+
# Set to nan the values not in the mask
872+
df1[~df_mask] = np.nan
873+
df2[~df_mask] = np.nan
874+
866875
std = (np.std(df1) + np.std(df2) + EPS) / 2
867876
mu = (np.nanmean(df1, axis=0) + np.nanmean(df2, axis=0)) / 2
868877
df1 = (df1 - mu) / std
@@ -911,7 +920,7 @@ def frechet_distance(
911920
912921
"""
913922
if method == "single":
914-
return frechet_distance_base(df1, df2)
923+
return frechet_distance_base(df1, df2, df_mask)
915924
return pattern_based_weighted_mean_metric(
916925
df1,
917926
df2,

qolmat/imputations/imputers_pytorch.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import pandas as pd
99
from numpy.typing import NDArray
1010
from sklearn.preprocessing import StandardScaler
11+
from tqdm import tqdm
1112

1213
# from typing_extensions import Self
1314
from qolmat.benchmark import metrics
@@ -106,23 +107,21 @@ def _fit_estimator(
106107
optimizer = optim.Adam(estimator.parameters(), lr=self.learning_rate)
107108
loss_fn = self.loss_fn
108109

109-
for epoch in range(self.epochs):
110-
estimator.train()
111-
optimizer.zero_grad()
112-
113-
input_data = torch.Tensor(X.values)
114-
target_data = torch.Tensor(y.values)
115-
target_data = target_data.unsqueeze(1)
116-
outputs = estimator(input_data)
117-
loss = loss_fn(outputs, target_data)
118-
119-
loss.backward()
120-
optimizer.step()
121-
if (epoch + 1) % 10 == 0:
122-
logging.info(
123-
f"Epoch [{epoch + 1}/{self.epochs}], "
124-
f"Loss: {loss.item():.4f}"
125-
)
110+
with tqdm(total=self.epochs, desc="Training", unit="epoch") as pbar:
111+
for _ in range(self.epochs):
112+
estimator.train()
113+
optimizer.zero_grad()
114+
115+
input_data = torch.Tensor(X.values)
116+
target_data = torch.Tensor(y.values)
117+
target_data = target_data.unsqueeze(1)
118+
outputs = estimator(input_data)
119+
loss = loss_fn(outputs, target_data)
120+
121+
loss.backward()
122+
optimizer.step()
123+
pbar.set_postfix(loss=f"{loss.item():.4f}")
124+
pbar.update(1)
126125
return estimator
127126

128127
def _predict_estimator(

0 commit comments

Comments
 (0)