diff --git a/download_data.py b/download_data.py index 4a67f3e..4877b37 100644 --- a/download_data.py +++ b/download_data.py @@ -4,6 +4,7 @@ public/private data as `tar.gz` archives in dedicated OSF folders named after the challenge. """ +import shutil import tarfile import argparse from zlib import adler32 @@ -124,6 +125,7 @@ def setup_data(data_path, private=False, username=None, password=None): archive = download_split_archive_from_osf( public_folder, split_files, data_path ) + print("Extracting the data...", end='', flush=True) with tarfile.open(archive) as tar: tar.extractall(data_path) @@ -133,7 +135,8 @@ def setup_data(data_path, private=False, username=None, password=None): for f in (data_path / "public").glob("*")] (data_path / "public").rmdir() archive.unlink() - (data_path / "validation.h5").symlink_to(data_path / "test.h5") + shutil.copyfile(data_path / "test.h5", data_path / "validation.h5") + print("Done.") if private: private_folder = get_folder(PRIVATE_PROJECT, username, password) diff --git a/submissions/starting_kit/estimator.py b/submissions/starting_kit/estimator.py index d9a3c4b..c3d35bd 100644 --- a/submissions/starting_kit/estimator.py +++ b/submissions/starting_kit/estimator.py @@ -1,11 +1,8 @@ -from sklearn import set_config from sklearn.pipeline import make_pipeline from sklearn.compose import make_column_transformer from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import OrdinalEncoder -set_config(transform_output="pandas") - class IgnoreDomain(RandomForestRegressor): def fit(self, X, y):