Skip to content

Commit

Permalink
data loading tutorial added
Browse files Browse the repository at this point in the history
  • Loading branch information
kexinhuang12345 committed Apr 14, 2020
1 parent beb9cd5 commit a14e339
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 4 deletions.
8 changes: 4 additions & 4 deletions dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ def read_file_repurposing_library(path):
X_drug_names = []
for aline in file:
values = aline.split()
X_drug.append(values[0])
X_drug_names.append(float(values[1]))
X_drug.append(values[1])
X_drug_names.append(values[0])
file.close()
return np.array(X_drug), np.array(X_drug_names)

Expand All @@ -96,10 +96,10 @@ def read_file_target_sequence(path):
print('Path Not Found, please double check!')
values = file.readline().split()
file.close()
return values[0], values[1]
return values[1], values[0]


def download_BindingDB(path):
def download_BindingDB(path = './data'):

print('Beginning to download dataset...')

Expand Down
25 changes: 25 additions & 0 deletions toy_data/AID1706.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
SGFKKLVSPSSAVEKCIVSVSYRGNNLNGLWLGDSIYCPRHVLGKFSGDQWGDVLNLANNHEFEVVTQNGVTLNVVSRRLKGAVLILQTAVANAETPKYKFVKANCGDSFTIACSYGGTVIGLYPVTMRSNGTIRASFLAGACGSVGFNIEKGVVNFFYMHHLELPNALHTGTDLMGEFYGGYVDEEVAQRVPPDNLVTNNIVAWLYAAIISVKESSFSQPKWLESTTVSIEDYNRWASDNGFTPFSTSTAITKLSAITGVDVCKLLRTIMVKSAQWGSDPILGQYNFEDELTPESVFNQVGGVRLQ
CCOC1=CC=C(C=C1)N2C=CC(=O)C(=N2)C(=O)NC3=CC=C(C=C3)S(=O)(=O)NC4=NC=CC=N4 0
CCCCOC(=O)C1=CC=C(C=C1)NC(=O)/C=C/C2=CC=CS2 0
COC1=C(C=C(C=C1)C(=O)NC2CCSC3=CC=CC=C23)F 0
C1=CC=C(C=C1)C2=C(C(=O)NC(=N2)SCC(=O)NC3=CC=C(C=C3)OC(F)(F)Cl)C#N 1
CC(=O)NC1C2=C(C=CC(=C2)Cl)N(C1=O)CCN3CCOCC3.Cl 1
CC1=CC=C(C=C1)C(=O)NC(=C(Cl)Cl)S(=O)(=O)C2=CC=CC=C2 1
C1=CC=C(C=C1)CSC2=NN=C(S2)SCC3=CC=C(C=C3)C(=O)O 1
C1=CC=C2C(=C1)C(=CC(=C2N)C(C(F)(F)F)(C(F)(F)F)O)C(C(F)(F)F)(C(F)(F)F)O 1
COC1=CC=CC=C1CSC2=NC3=CC=CC=C3N2 0
CC(=O)NC1=CC=C(C=C1)N(C(C2=CC(=C(C=C2)O)O)C(=O)NC3CCCC3)C(=O)CN4C5=CC=CC=C5N=N4 1
CC1=CC=NC2=NC(=NN12)C(=O)OCCOC3=CC=CC=C3 0
CC1(C2CC=C(C1C2)/C=N/NC(=S)NC3=CN=CC=C3)C 1
CCOC(=O)C1=C(OC2=C(C13C4=C(C=CC5=C4N(C3=O)C(C=C5C)(C)C)C)C(=O)CCC2)N 0
C1CN(CCC12OCCO2)C3=NC=NC4=C3NC5=C4C=C(C=C5)Cl 0
CC(C)C1=CC=C(C=C1)NC(=O)CSC2=NC3=C(C(=O)N2CC4=CC=C(C=C4)C(=O)O)SC=C3 1
COC1=CC=CC=C1/C=C/C(=O)C2=C(C=CC(=C2)Br)OC(=O)C3=CC=CO3 1
CC(C)NC(=O)CN1CCN(CC1)C2=NC=CC=N2.C(=O)(C(=O)O)O 1
CC1=C(C(NC(=O)N1CCCC(=O)O)C2=CC=C(C=C2)Cl)C(=O)OCC3=CC=CC=C3 0
CC1=CC=CC=C1NC2=C/C(=N\S(=O)(=O)C3=CC=C(C=C3)C(=O)O)/C4=CC=CC=C4C2=O 1
CC(C)OC1=C(C=C(C(=C1)NC(=O)CSCC(=O)NC2=CC=CC(=C2)C(F)(F)F)Cl)Cl 1
CC1=CC=CC=C1NC2=NN=C(S2)SCC(=O)NC3=C(C=C(C=N3)C(F)(F)F)Cl 1
C1=CC=C(C=C1)C2=CSC3=C2C(=NC=N3)SCC(=O)NC4=CC=C(C=C4)C(=O)O 1
C1=CC2=NON=C2C(=C1)S(=O)(=O)NC3=C(C=CC(=C3)F)F 1
COC(=O)[C@@H](CC1=CNC2=CC=CC=C21)NC(=O)C3=CC(=O)NC4=CC=CC=C43 1
1 change: 1 addition & 0 deletions toy_data/RNA_polymerase_SARS_CoV2_target_seq.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
RNA_polymerase_SARS_CoV2 SADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQ
2 changes: 2 additions & 0 deletions toy_data/dti.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CC1=C2C=C(C=CC2=NN1)C3=CC(=CN=C3)OCC(CC4=CC=CC=C4)N MKKFFDSRREQGGSGLGSGSSGGGGSTSGLGSGYIGRVFGIGRQQVTVDEVLAEGGFAIVFLVRTSNGMKCALKRMFVNNEHDLQVCKREIQIMRDLSGHKNIVGYIDSSINNVSSGDVWEVLILMDFCRGGQVVNLMNQRLQTGFTENEVLQIFCDTCEAVARLHQCKTPIIHRDLKVENILLHDRGHYVLCDFGSATNKFQNPQTEGVNAVEDEIKKYTTLSYRAPEMVNLYSGKIITTKADIWALGCLLYKLCYFTLPFGESQVAICDGNFTIPDNSRYSQDMHCLIRYMLEPDPDKRPDIYQVSYFSFKLLKKECPIPNVQNSPIPAKLPEPVKASEAAAKKTQPKARLTDPIPTTETSIAPRQRPKAGQTQPNPGILPIQPALTPRKRATVQPPPQAAGSSNQPGLLASVPQPKPQAPPSQPLPQTQAKQPQAPPTPQQTPSTQAQGLPAQAQATPQHQQQLFLKQQQQQQQPPPAQQQPAGTFYQQQQAQTQQFQAVHPATQKPAIAQFPVVSQGGSQQQLMQNFYQQQQQQQQQQQQQQLATALHQQQLMTQQAALQQKPTMAAGQQPQPQPAAAPQPAPAQEPAIQAPVRQQPKVQTTPPPAVQGQKVGSLTPPSSPKTQRAGHRRILSDVTHSAVFGVPASKSTQLLQAAAAEASLNKSKSATTTPSGSPRTSQQNVYNPSEGSTWNPFDDDNFSKLTAEELLNKDFAKLGEGKHPEKLGGSAESLIPGFQSTQGDAFATTSFSAGTAEKRKGGQTVDSGLPLLSVSDPFIPLQVPDAPEKLIEGLKSPDTSLLLPDLLPMTDPFGSTSDAVIEKADVAVESLIPGLEPPVPQRLPSQTESVTSNRTDSLTGEDSLLDCSLLSNPTTDLLEEFAPTAISAPVHKAAEDSNLISGFDVPEGSDKVAEDEFDPIPVLITKNPQGGHSRNSSGSSESSLPNLARSLLLVDQLIDL 7.365
CC1=C2C=C(C=CC2=NN1)C3=CC(=CN=C3)OCC(CC4=CC=CC=C4)N SADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQ 4.999
2 changes: 2 additions & 0 deletions toy_data/repurposing_data_examples.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Rufloxacin CN1CCN(CC1)c1c(F)cc2c3c1SCCn3cc(C(O)=O)c2=O
Sparfloxacin C[C@H]1CN(CC@@HN1)c1c(F)c(N)c2c(c1F)n(cc(C(O)=O)c2=O)C1CC1

0 comments on commit a14e339

Please sign in to comment.