Skip to content

Write to AWS #13

@hugobowne

Description

@hugobowne

I need to be able to write to an s3 bucket.

@necaris is going to help. thanks, Rami!

you can see current error I get here:



---------------------------------------------------------------------------
NoCredentialsError                        Traceback (most recent call last)
<timed eval> in <module>

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/dask/dataframe/core.py in to_parquet(self, path, *args, **kwargs)
   3947         from .io import to_parquet
   3948 
-> 3949         return to_parquet(self, path, *args, **kwargs)
   3950 
   3951     @derived_from(pd.DataFrame)

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/dask/dataframe/io/parquet/core.py in to_parquet(df, path, engine, compression, write_index, append, ignore_divisions, partition_on, storage_options, write_metadata_file, compute, compute_kwargs, schema, **kwargs)
    461     # Engine-specific initialization steps to write the dataset.
    462     # Possibly create parquet metadata, and load existing stuff if appending
--> 463     meta, schema, i_offset = engine.initialize_write(
    464         df,
    465         fs,

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/dask/dataframe/io/parquet/arrow.py in initialize_write(df, fs, path, append, partition_on, ignore_divisions, division_info, schema, index_cols, **kwargs)
    876         if append and division_info is None:
    877             ignore_divisions = True
--> 878         fs.mkdirs(path, exist_ok=True)
    879 
    880         if append:

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/fsspec/spec.py in mkdirs(self, path, exist_ok)
   1016     def mkdirs(self, path, exist_ok=False):
   1017         """Alias of :ref:`FilesystemSpec.makedirs`."""
-> 1018         return self.makedirs(path, exist_ok=exist_ok)
   1019 
   1020     def listdir(self, path, detail=True, **kwargs):

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/s3fs/core.py in makedirs(self, path, exist_ok)
    458 
    459     def makedirs(self, path, exist_ok=False):
--> 460         self.mkdir(path, create_parents=True)
    461 
    462     async def _rmdir(self, path):

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/fsspec/asyn.py in wrapper(*args, **kwargs)
     98     def wrapper(*args, **kwargs):
     99         self = obj or args[0]
--> 100         return maybe_sync(func, self, *args, **kwargs)
    101 
    102     return wrapper

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/fsspec/asyn.py in maybe_sync(func, self, *args, **kwargs)
     78         if inspect.iscoroutinefunction(func):
     79             # run the awaitable on the loop
---> 80             return sync(loop, func, *args, **kwargs)
     81         else:
     82             # just call the blocking function

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/fsspec/asyn.py in sync(loop, func, callback_timeout, *args, **kwargs)
     49     if error[0]:
     50         typ, exc, tb = error[0]
---> 51         raise exc.with_traceback(tb)
     52     else:
     53         return result[0]

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/fsspec/asyn.py in f()
     33             if callback_timeout is not None:
     34                 future = asyncio.wait_for(future, callback_timeout)
---> 35             result[0] = await future
     36         except Exception:
     37             error[0] = sys.exc_info()

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/s3fs/core.py in _mkdir(self, path, acl, create_parents, **kwargs)
    444                         'LocationConstraint': region_name
    445                     }
--> 446                 await self.s3.create_bucket(**params)
    447                 self.invalidate_cache('')
    448                 self.invalidate_cache(bucket)

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/aiobotocore/client.py in _make_api_call(self, operation_name, api_params)
     89             http, parsed_response = event_response
     90         else:
---> 91             http, parsed_response = await self._make_request(
     92                 operation_model, request_dict, request_context)
     93 

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/aiobotocore/client.py in _make_request(self, operation_model, request_dict, request_context)
    110                             request_context):
    111         try:
--> 112             return await self._endpoint.make_request(operation_model,
    113                                                      request_dict)
    114         except Exception as e:

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/aiobotocore/endpoint.py in _send_request(self, request_dict, operation_model)
    224     async def _send_request(self, request_dict, operation_model):
    225         attempts = 1
--> 226         request = self.create_request(request_dict, operation_model)
    227         context = request_dict['context']
    228         success_response, exception = await self._get_response(

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/botocore/endpoint.py in create_request(self, params, operation_model)
    113                 service_id=service_id,
    114                 op_name=operation_model.name)
--> 115             self._event_emitter.emit(event_name, request=request,
    116                                      operation_name=operation_model.name)
    117         prepared_request = self.prepare_request(request)

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/botocore/hooks.py in emit(self, event_name, **kwargs)
    354     def emit(self, event_name, **kwargs):
    355         aliased_event_name = self._alias_event_name(event_name)
--> 356         return self._emitter.emit(aliased_event_name, **kwargs)
    357 
    358     def emit_until_response(self, event_name, **kwargs):

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/botocore/hooks.py in emit(self, event_name, **kwargs)
    226                  handlers.
    227         """
--> 228         return self._emit(event_name, kwargs)
    229 
    230     def emit_until_response(self, event_name, **kwargs):

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/botocore/hooks.py in _emit(self, event_name, kwargs, stop_on_response)
    209         for handler in handlers_to_call:
    210             logger.debug('Event %s: calling handler %s', event_name, handler)
--> 211             response = handler(**kwargs)
    212             responses.append((handler, response))
    213             if stop_on_response and response is not None:

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/botocore/signers.py in handler(self, operation_name, request, **kwargs)
     88         # this method is invoked to sign the request.
     89         # Don't call this method directly.
---> 90         return self.sign(operation_name, request)
     91 
     92     def sign(self, operation_name, request, region_name=None,

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/botocore/signers.py in sign(self, operation_name, request, region_name, signing_type, expires_in, signing_name)
    155                     raise e
    156 
--> 157             auth.add_auth(request)
    158 
    159     def _choose_signer(self, operation_name, signing_type, context):

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/botocore/auth.py in add_auth(self, request)
    423         self._region_name = signing_context.get(
    424             'region', self._default_region_name)
--> 425         super(S3SigV4Auth, self).add_auth(request)
    426 
    427     def _modify_request_before_signing(self, request):

~/opt/anaconda3/envs/data-science-at-scale/lib/python3.8/site-packages/botocore/auth.py in add_auth(self, request)
    355     def add_auth(self, request):
    356         if self.credentials is None:
--> 357             raise NoCredentialsError
    358         datetime_now = datetime.datetime.utcnow()
    359         request.context['timestamp'] = datetime_now.strftime(SIGV4_TIMESTAMP)

NoCredentialsError: Unable to locate credentials

distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
_GatheringFuture exception was never retrieved
future: <_GatheringFuture finished exception=CancelledError()>
asyncio.exceptions.CancelledError


Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions