@@ -217,8 +217,9 @@ Additionally, you can inject client connection settings for [S3](https://boto3.a
217217from litdata import StreamingDataset
218218
219219storage_options = {
220- " key" : " your_access_key_id" ,
221- " secret" : " your_secret_access_key" ,
220+ " endpoint_url" : " your_endpoint_url" ,
221+ " aws_access_key_id" : " your_access_key_id" ,
222+ " aws_secret_access_key" : " your_secret_access_key" ,
222223}
223224
224225dataset = StreamingDataset(' s3://my-bucket/my-data' , storage_options = storage_options)
@@ -263,47 +264,33 @@ for batch in val_dataloader:
263264
264265  ;
265266
266- The StreamingDataset supports reading optimized datasets from common cloud providers.
267+ The StreamingDataset supports reading optimized datasets from common cloud providers.
267268
268269``` python
269270import os
270271import litdata as ld
271272
272273# Read data from AWS S3
273274aws_storage_options= {
274- " key " : os.environ[' AWS_ACCESS_KEY_ID' ],
275- " secret " : os.environ[' AWS_SECRET_ACCESS_KEY' ],
275+ " AWS_ACCESS_KEY_ID " : os.environ[' AWS_ACCESS_KEY_ID' ],
276+ " AWS_SECRET_ACCESS_KEY " : os.environ[' AWS_SECRET_ACCESS_KEY' ],
276277}
277278dataset = ld.StreamingDataset(" s3://my-bucket/my-data" , storage_options = aws_storage_options)
278279
279280# Read data from GCS
280281gcp_storage_options= {
281- " token" : {
282- # dumped from cat ~/.config/gcloud/application_default_credentials.json
283- " account" : " " ,
284- " client_id" : " your_client_id" ,
285- " client_secret" : " your_client_secret" ,
286- " quota_project_id" : " your_quota_project_id" ,
287- " refresh_token" : " your_refresh_token" ,
288- " type" : " authorized_user" ,
289- " universe_domain" : " googleapis.com" ,
290- }
282+ " project" : os.environ[' PROJECT_ID' ],
291283}
292284dataset = ld.StreamingDataset(" gs://my-bucket/my-data" , storage_options = gcp_storage_options)
293285
294286# Read data from Azure
295287azure_storage_options= {
296- " account_name " : " azure_account_name " ,
297- " account_key " : os.environ[' AZURE_ACCOUNT_ACCESS_KEY' ]
288+ " account_url " : f " https:// { os.environ[ ' AZURE_ACCOUNT_NAME ' ] } .blob.core.windows.net " ,
289+ " credential " : os.environ[' AZURE_ACCOUNT_ACCESS_KEY' ]
298290}
299291dataset = ld.StreamingDataset(" azure://my-bucket/my-data" , storage_options = azure_storage_options)
300292```
301293
302- - For more details on which storage options are supported, please refer to:
303- - [ AWS S3 storage options] ( https://github.com/fsspec/s3fs/blob/main/s3fs/core.py#L176 )
304- - [ GCS storage options] ( https://github.com/fsspec/gcsfs/blob/main/gcsfs/core.py#L154 )
305- - [ Azure storage options] ( https://github.com/fsspec/adlfs/blob/main/adlfs/spec.py#L124 )
306-
307294</details >
308295
309296<details >
0 commit comments