From 263083e7a131249b8bfc8b480b748a706440926d Mon Sep 17 00:00:00 2001 From: bsfaical Date: Fri, 22 Nov 2024 16:26:33 -0300 Subject: [PATCH] chore: parse into dataframe using pandas Depending on the security level of the operating system, there may be an SSL certificate error. This change makes another attempt to get the dataset using urlopen and then parse into dataframe using pandas. --- src/ucimlrepo/fetch.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/ucimlrepo/fetch.py b/src/ucimlrepo/fetch.py index 947c550..0ef74bd 100644 --- a/src/ucimlrepo/fetch.py +++ b/src/ucimlrepo/fetch.py @@ -5,6 +5,7 @@ import urllib.parse import certifi import ssl +from io import BytesIO from ucimlrepo.dotdict import dotdict @@ -96,7 +97,14 @@ def fetch_ucirepo( try: df = pd.read_csv(data_url) except (urllib.error.URLError, urllib.error.HTTPError): - raise DatasetNotFoundError('Error reading data csv file for "{}" dataset (id={}).'.format(name, id)) + try: + response_data=urllib.request.urlopen(data_url, context=ssl.create_default_context(cafile=certifi.where())) + data_byte = response_data.read() + df = pd.read_csv(BytesIO(data_byte)) + except: + raise DatasetNotFoundError('Error reading data csv file for "{}" dataset (id={}).'.format(name, id)) + else: + pass if df.empty: raise DatasetNotFoundError('Error reading data csv file for "{}" dataset (id={}).'.format(name, id))