Skip to content

Commit c6e0e66

Browse files
authored
Revert "Feat: Using fsspec to download files" (#380)
This reverts commit 719bae2.
1 parent 08d6104 commit c6e0e66

File tree

16 files changed

+611
-441
lines changed

16 files changed

+611
-441
lines changed

README.md

Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,9 @@ Additionally, you can inject client connection settings for [S3](https://boto3.a
217217
from litdata import StreamingDataset
218218

219219
storage_options = {
220-
"key": "your_access_key_id",
221-
"secret": "your_secret_access_key",
220+
"endpoint_url": "your_endpoint_url",
221+
"aws_access_key_id": "your_access_key_id",
222+
"aws_secret_access_key": "your_secret_access_key",
222223
}
223224

224225
dataset = StreamingDataset('s3://my-bucket/my-data', storage_options=storage_options)
@@ -263,47 +264,33 @@ for batch in val_dataloader:
263264

264265
 
265266

266-
The StreamingDataset supports reading optimized datasets from common cloud providers.
267+
The StreamingDataset supports reading optimized datasets from common cloud providers.
267268

268269
```python
269270
import os
270271
import litdata as ld
271272

272273
# Read data from AWS S3
273274
aws_storage_options={
274-
"key": os.environ['AWS_ACCESS_KEY_ID'],
275-
"secret": os.environ['AWS_SECRET_ACCESS_KEY'],
275+
"AWS_ACCESS_KEY_ID": os.environ['AWS_ACCESS_KEY_ID'],
276+
"AWS_SECRET_ACCESS_KEY": os.environ['AWS_SECRET_ACCESS_KEY'],
276277
}
277278
dataset = ld.StreamingDataset("s3://my-bucket/my-data", storage_options=aws_storage_options)
278279

279280
# Read data from GCS
280281
gcp_storage_options={
281-
"token": {
282-
# dumped from cat ~/.config/gcloud/application_default_credentials.json
283-
"account": "",
284-
"client_id": "your_client_id",
285-
"client_secret": "your_client_secret",
286-
"quota_project_id": "your_quota_project_id",
287-
"refresh_token": "your_refresh_token",
288-
"type": "authorized_user",
289-
"universe_domain": "googleapis.com",
290-
}
282+
"project": os.environ['PROJECT_ID'],
291283
}
292284
dataset = ld.StreamingDataset("gs://my-bucket/my-data", storage_options=gcp_storage_options)
293285

294286
# Read data from Azure
295287
azure_storage_options={
296-
"account_name": "azure_account_name",
297-
"account_key": os.environ['AZURE_ACCOUNT_ACCESS_KEY']
288+
"account_url": f"https://{os.environ['AZURE_ACCOUNT_NAME']}.blob.core.windows.net",
289+
"credential": os.environ['AZURE_ACCOUNT_ACCESS_KEY']
298290
}
299291
dataset = ld.StreamingDataset("azure://my-bucket/my-data", storage_options=azure_storage_options)
300292
```
301293

302-
- For more details on which storage options are supported, please refer to:
303-
- [AWS S3 storage options](https://github.com/fsspec/s3fs/blob/main/s3fs/core.py#L176)
304-
- [GCS storage options](https://github.com/fsspec/gcsfs/blob/main/gcsfs/core.py#L154)
305-
- [Azure storage options](https://github.com/fsspec/adlfs/blob/main/adlfs/spec.py#L124)
306-
307294
</details>
308295

309296
<details>

requirements.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,5 @@ torch
22
lightning-utilities
33
filelock
44
numpy
5-
# boto3
5+
boto3
66
requests
7-
fsspec
8-
fsspec[s3] # aws s3

requirements/extras.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,3 @@ pyarrow
55
tqdm
66
lightning-sdk ==0.1.17 # Must be pinned to ensure compatibility
77
google-cloud-storage
8-
fsspec[gs] # google cloud storage
9-
fsspec[abfs] # azure blob

src/litdata/constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,3 @@
8585
_TIME_FORMAT = "%Y-%m-%d_%H-%M-%S.%fZ"
8686
_IS_IN_STUDIO = bool(os.getenv("LIGHTNING_CLOUD_PROJECT_ID", None)) and bool(os.getenv("LIGHTNING_CLUSTER_ID", None))
8787
_ENABLE_STATUS = bool(int(os.getenv("ENABLE_STATUS_REPORT", "0")))
88-
_SUPPORTED_CLOUD_PROVIDERS = ["s3", "gs", "azure", "abfs"]

0 commit comments

Comments
 (0)