2020import re
2121import urllib
2222from pathlib import Path
23- from typing import TYPE_CHECKING , List , Optional , Tuple
23+ from typing import TYPE_CHECKING , List , Optional , Tuple , cast
2424
2525from renku .core import errors
2626from renku .core .dataset .providers .api import ProviderApi , ProviderCredentials , ProviderPriority
@@ -43,8 +43,11 @@ class S3Provider(ProviderApi):
4343
4444 def __init__ (self , uri : Optional [str ]):
4545 super ().__init__ (uri = uri )
46- bucket , _ = extract_bucket_and_path (uri = self .uri )
46+
47+ endpoint , bucket , _ = parse_s3_uri (uri = self .uri )
48+
4749 self ._bucket : str = bucket
50+ self ._endpoint : str = endpoint
4851
4952 @staticmethod
5053 def supports (uri : str ) -> bool :
@@ -100,16 +103,16 @@ def add(uri: str, destination: Path, **kwargs) -> List["DatasetAddMetadata"]:
100103 if not storage .exists (uri ):
101104 raise errors .ParameterError (f"S3 bucket '{ uri } ' doesn't exists." )
102105
103- repository = project_context .repository
106+ destination_path_in_repo = Path ( destination ). relative_to ( project_context .repository . path )
104107 hashes = storage .get_hashes (uri = uri )
105108 return [
106109 DatasetAddMetadata (
107- entity_path = Path ( destination ). relative_to ( repository . path ) / hash .path ,
110+ entity_path = destination_path_in_repo / hash .path ,
108111 url = hash .base_uri ,
109112 action = DatasetAddAction .NONE ,
110113 based_on = RemoteEntity (checksum = hash .hash if hash .hash else "" , url = hash .base_uri , path = hash .path ),
111114 source = Path (hash .full_uri ),
112- destination = Path ( destination ). relative_to ( repository . path ) ,
115+ destination = destination_path_in_repo ,
113116 gitignored = True ,
114117 )
115118 for hash in hashes
@@ -120,6 +123,11 @@ def bucket(self) -> str:
120123 """Return S3 bucket name."""
121124 return self ._bucket
122125
126+ @property
127+ def endpoint (self ) -> str :
128+ """Return S3 bucket endpoint."""
129+ return self ._endpoint
130+
123131 def on_create (self , dataset : "Dataset" ) -> None :
124132 """Hook to perform provider-specific actions on a newly-created dataset."""
125133 credentials = S3Credentials (provider = self )
@@ -145,15 +153,39 @@ def get_credentials_names() -> Tuple[str, ...]:
145153 """Return a tuple of the required credentials for a provider."""
146154 return "Access Key ID" , "Secret Access Key"
147155
156+ @property
157+ def provider (self ) -> S3Provider :
158+ """Return the associated provider instance."""
159+ return cast (S3Provider , self ._provider )
160+
161+ def get_credentials_section_name (self ) -> str :
162+ """Get section name for storing credentials.
163+
164+ NOTE: This methods should be overridden by subclasses to allow multiple credentials per providers if needed.
165+ """
166+ return self .provider .endpoint .lower ()
148167
149- def extract_bucket_and_path (uri : str ) -> Tuple [str , str ]:
150- """Extract bucket name and path within the bucket from a given URI.
151168
152- NOTE: We only support s3://<bucket-name>/<path> at the moment.
169+ def create_renku_s3_uri (uri : str ) -> str :
170+ """Create a S3 URI to work with Renku."""
171+ _ , bucket , path = parse_s3_uri (uri = uri )
172+
173+ return f"s3://{ bucket } /{ path } "
174+
175+
176+ def parse_s3_uri (uri : str ) -> Tuple [str , str , str ]:
177+ """Extract endpoint, bucket name, and path within the bucket from a given URI.
178+
179+ NOTE: We only support s3://<endpoint>/<bucket-name>/<path> at the moment.
153180 """
154181 parsed_uri = urllib .parse .urlparse (uri )
155182
156- if parsed_uri .scheme .lower () != "s3" or not parsed_uri .netloc :
157- raise errors .ParameterError (f"Invalid S3 URI: { uri } " )
183+ endpoint = parsed_uri .netloc
184+ path = parsed_uri .path .strip ("/" )
185+
186+ if parsed_uri .scheme .lower () != "s3" or not endpoint or not path :
187+ raise errors .ParameterError (f"Invalid S3 URI: { uri } . Valid format is 's3://<endpoint>/<bucket-name>/<path>'" )
188+
189+ bucket , _ , path = path .partition ("/" )
158190
159- return parsed_uri . netloc , parsed_uri . path
191+ return endpoint , bucket , path . strip ( "/" )
0 commit comments