3737from renku .core .dataset .request_model import ImageRequestModel
3838from renku .core .dataset .tag import get_dataset_by_tag , prompt_access_token , prompt_tag_selection
3939from renku .core .interface .dataset_gateway import IDatasetGateway
40- from renku .core .interface .storage import IStorageFactory
4140from renku .core .storage import check_external_storage , pull_paths_from_storage , track_paths_in_storage
4241from renku .core .util import communication
4342from renku .core .util .datetime8601 import local_now
@@ -399,7 +398,7 @@ def export_dataset(name, provider_name, tag, **kwargs):
399398
400399 dataset : Optional [Dataset ] = datasets_provenance .get_by_name (name , strict = True , immutable = True )
401400
402- provider = ProviderFactory .from_name ( provider_name )
401+ provider = ProviderFactory .get_export_provider ( provider_name = provider_name )
403402
404403 selected_tag = None
405404 tags = datasets_provenance .get_all_tags (dataset ) # type: ignore
@@ -879,11 +878,7 @@ def update_dataset_custom_metadata(
879878
880879
881880@inject .autoparams ("dataset_gateway" )
882- def move_files (
883- dataset_gateway : IDatasetGateway ,
884- files : Dict [Path , Path ],
885- to_dataset_name : Optional [str ] = None ,
886- ):
881+ def move_files (dataset_gateway : IDatasetGateway , files : Dict [Path , Path ], to_dataset_name : Optional [str ] = None ):
887882 """Move files and their metadata from one or more datasets to a target dataset.
888883
889884 Args:
@@ -1222,13 +1217,11 @@ def should_include(filepath: Path) -> bool:
12221217 return sorted (records , key = lambda r : r .date_added )
12231218
12241219
1225- @inject .autoparams ("storage_factory" )
1226- def pull_external_data (name : str , storage_factory : IStorageFactory , location : Optional [Path ] = None ) -> None :
1220+ def pull_external_data (name : str , location : Optional [Path ] = None ) -> None :
12271221 """Pull/copy data for an external storage to a dataset's data directory or a specified location.
12281222
12291223 Args:
12301224 name(str): Name of the dataset
1231- storage_factory(IStorageFactory):Injected storage factory.
12321225 location(Optional[Path]): A directory to copy data to (Default value = None).
12331226 """
12341227 datasets_provenance = DatasetsProvenance ()
@@ -1256,28 +1249,25 @@ def pull_external_data(name: str, storage_factory: IStorageFactory, location: Op
12561249 create_symlinks = False
12571250
12581251 provider = ProviderFactory .get_pull_provider (uri = dataset .storage )
1252+ storage = provider .get_storage ()
12591253
1260- credentials = S3Credentials (provider )
1261- prompt_for_credentials (credentials )
1262-
1263- storage = storage_factory .get_storage (provider = provider , credentials = credentials )
12641254 updated_files = []
12651255
12661256 for file in dataset .files :
12671257 path = Path (destination ) / file .entity .path
12681258 path .parent .mkdir (parents = True , exist_ok = True )
12691259 # NOTE: Don't check if destination exists. ``IStorage.copy`` won't copy a file if it exists and is not modified.
12701260
1271- if not file .source :
1261+ if not file .based_on :
12721262 raise errors .DatasetImportError (f"Dataset file doesn't have a URI: { file .entity .path } " )
12731263
12741264 with communication .busy (f"Copying { file .entity .path } ..." ):
1275- storage .copy (file .source , path )
1265+ storage .download (file .based_on . url , path )
12761266
12771267 # NOTE: Make files read-only since we don't support pushing data to the remote storage
12781268 os .chmod (path , 0o400 )
12791269
1280- if file . based_on and not file .based_on .checksum :
1270+ if not file .based_on .checksum :
12811271 md5_hash = hash_file (path , hash_type = "md5" ) or ""
12821272 file .based_on = RemoteEntity (checksum = md5_hash , url = file .based_on .url , path = file .based_on .path )
12831273
@@ -1313,20 +1303,13 @@ def read_dataset_data_location(dataset: Dataset) -> Optional[str]:
13131303 return get_value (section = "dataset-locations" , key = dataset .name , config_filter = ConfigFilter .LOCAL_ONLY )
13141304
13151305
1316- @inject .autoparams ("storage_factory" )
1317- def mount_external_storage (
1318- name : str ,
1319- existing : Optional [Path ],
1320- yes : bool ,
1321- storage_factory : IStorageFactory ,
1322- ) -> None :
1306+ def mount_external_storage (name : str , existing : Optional [Path ], yes : bool ) -> None :
13231307 """Mount an external storage to a dataset's data directory.
13241308
13251309 Args:
13261310 name(str): Name of the dataset
13271311 existing(Optional[Path]): An existing mount point to use instead of actually mounting the external storage.
13281312 yes(bool): Don't prompt when removing non-empty dataset's data directory.
1329- storage_factory(IStorageFactory): Injected storage factory.
13301313 """
13311314 dataset , datadir = _get_dataset_with_external_storage (name = name )
13321315
@@ -1350,8 +1333,8 @@ def mount_external_storage(
13501333 provider = ProviderFactory .get_mount_provider (uri = dataset .storage )
13511334 credentials = S3Credentials (provider )
13521335 prompt_for_credentials (credentials )
1336+ storage = provider .get_storage (credentials = credentials )
13531337
1354- storage = storage_factory .get_storage (provider = provider , credentials = credentials )
13551338 with communication .busy (f"Mounting { provider .uri } " ):
13561339 storage .mount (datadir )
13571340
0 commit comments