Skip to content

Commit

Permalink
Merge pull request #17 from ispras/issue14
Browse files Browse the repository at this point in the history
#14: update ptbxl downloading
  • Loading branch information
AvetisyanAram authored Jul 24, 2024
2 parents 21657c5 + b498768 commit 94febd6
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions src/ecglib/data/load_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,21 @@ def load_ptb_xl(
path_to_zip: str = "./",
path_to_unzip: str = "./",
delete_zip: bool = True,
frequency: int = 500,
) -> pd.DataFrame:
"""
Load PTB-XL dataset
:param download: whether to download PTB-XL from Physionet
:param path_to_zip: path where to store PTB-XL .zip file
:param path_to_unzip: path where to unarchive PTB-XL .zip file
:param delete_zip: whether to delete PTB-XL .zip file after unarchiving
:param frequency: sampling frequency of signals along the `fpath` column
:return: dataframe with PTB-XL dataset info
"""

if download:
url = "https://physionet.org/static/published-projects/ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.2.zip"
url = "https://physionet.org/static/published-projects/ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3.zip"
ptb_xl_zip = os.path.join(path_to_zip, "ptb_xl.zip")
response = requests.get(url, stream=True)
total_size_in_bytes = int(response.headers.get("content-length", 0))
Expand Down Expand Up @@ -59,20 +61,26 @@ def load_ptb_xl(
os.remove(ptb_xl_zip)
print("Deleting completed!")

if frequency == 500:
suffix = 'hr' # high rate
else:
assert frequency == 100, f"PTB-XL signals are only supported with 100 or 500 sample frequency, recieved: {frequency}"
suffix = 'lr' # low rate

ptb_xl_info = pd.read_csv(
os.path.join(
path_to_unzip,
"ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.2",
"ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3",
"ptbxl_database.csv",
)
)
ptb_xl_info["fpath"] = [
os.path.join(
path_to_unzip,
"ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.2",
ptb_xl_info.iloc[i]["filename_hr"],
"ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3",
ptb_xl_info.iloc[i][f"filename_{suffix}"],
)
for i in range(len(ptb_xl_info["filename_hr"]))
for i in range(len(ptb_xl_info[f"filename_{suffix}"]))
]

return ptb_xl_info
Expand Down

0 comments on commit 94febd6

Please sign in to comment.