Skip to content

Commit

Permalink
add utils function to download HIV data and fix the tutorial 2 bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
kexinhuang12345 committed Sep 23, 2020
1 parent 8b62f6f commit 0dc3c37
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 77 deletions.
31 changes: 30 additions & 1 deletion DeepPurpose/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
import wget
from zipfile import ZipFile
from DeepPurpose.utils import convert_y_unit
from DeepPurpose.utils import *
import json
import os

Expand Down Expand Up @@ -393,6 +393,35 @@ def load_AID1706_SARS_CoV_3CL(path = './data', binary = True, threshold = 15, ba
print('Done!')
return np.array(X_drug), target, np.array(y)

def load_HIV(path = './data'):
download_unzip('HIV', path, 'hiv.csv')

df = pd.read_csv(os.path.join(path,'hiv.csv'))
df = df.iloc[df['smiles'].drop_duplicates(keep = False).index.values]

df = df[df["HIV_active"].notnull()].reset_index(drop = True)
y = df["HIV_active"].values
drugs = df.smiles.values
drugs_idx = np.array(list(range(len(drugs))))

return drugs, y, drugs_idx

def load_AqSolDB(path = './data'):

if os.path.exists(os.path.join(path,'curated-solubility-dataset.csv')):
print('Dataset already downloaded in the local system...', flush = True, file = sys.stderr)
else:
wget.download('https://dataverse.harvard.edu/api/access/datafile/3407241?format=original&gbrecs=true', path)

df = pd.read_csv(os.path.join(path,'curated-solubility-dataset.csv'))
df = df.iloc[df['SMILES'].drop_duplicates(keep = False).index.values]

y = df["Solubility"].values
drugs = df.SMILES.values
drugs_idx = df.Name.values

return drugs, y, drugs_idx

def load_broad_repurposing_hub(path = './data'):
url = 'https://deeppurpose.s3.amazonaws.com/broad.csv'
if not os.path.exists(path):
Expand Down
21 changes: 21 additions & 0 deletions DeepPurpose/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import wget
from zipfile import ZipFile
import os
import sys

# ESPF encoding
vocab_path = './DeepPurpose/ESPF/drug_codes_chembl_freq_1500.txt'
Expand Down Expand Up @@ -871,6 +872,26 @@ def load_dict(path):
with open(path + '/config.pkl', 'rb') as f:
return pickle.load(f)

URLs = {
'HIV': 'https://s3-us-west-1.amazonaws.com/deepchem.io/datasets/molnet_publish/hiv.zip'
}


def download_unzip(name, path, file_name):
if not os.path.exists(path):
os.mkdir(path)

if os.path.exists(os.path.join(path, file_name)):
print('Dataset already downloaded in the local system...', flush = True, file = sys.stderr)
else:
print('Download zip file...', flush = True, file = sys.stderr)
url = URLs[name]
saved_path = wget.download(url, path)

print('Extract zip file...', flush = True, file = sys.stderr)
with ZipFile(saved_path, 'r') as zip:
zip.extractall(path = path)

def download_pretrained_model(model_name, save_dir = './save_folder'):
if model_name == 'DeepDTA_DAVIS':
print('Beginning Downloading DeepDTA_DAVIS Model...')
Expand Down
Loading

0 comments on commit 0dc3c37

Please sign in to comment.