-
Notifications
You must be signed in to change notification settings - Fork 0
/
jaspar.py
74 lines (56 loc) · 2.24 KB
/
jaspar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/python
from coreapi import Client
from Bio import SeqIO
import sys
import os
from collections import OrderedDict
from requests.exceptions import ConnectionError
try:
inputfile= open (sys.argv[1], 'r')
# to get the last argument on command line
filename = sys.argv[-1]
#outputfile name to store the results
outputfilename=os.path.splitext(filename)[0] + '.txt'
outputfilename1=os.path.splitext(filename)[0]+ 'finaltable' + '.txt'
try:
#Connecting to JASPAR
client=Client()
document=client.get('http://jaspar.genereg.net/api/v1/docs/')
except ConnectionError as e:
print (e)
print ("Database is not responding. Try again later. ")
print ("Profile Inference Search Started.....")
for record in SeqIO.parse(inputfile, "fasta"):
recordseq=record.seq
action=["infer","read"]
params={"sequence": '%s'%recordseq,}
result=client.action(document, action, params=params)
data1= record.id + "#" + str(result)
with open(outputfilename, 'a') as the_file:
the_file.write(str(data1)+'\n')
except ValueError:
if not inputfile:
raise ValueError('No sequence file')
else:
raise ValueError('check your sequence format')
finally:
print ("Profile Inference Search Finished")
resultfinle=open(outputfilename, 'r')
print ("Generating File.....")
with open(outputfilename1, 'a') as the_file:
the_file.write( "GeneID"+'\t'+ "Profile_name"+'\t'+ "TF_name"+'\t'+"Evalue" +'\t'+ " DBD" +'\t'+ "Matrix_url" +'\t'+ "Matrix_logo_url" +'\n')
for line in resultfinle:
line2=line.split('#')
a = eval(line2[1], {'OrderedDict': OrderedDict})
dictfilt = lambda x, y: dict([ (i,x[i]) for i in x if i in set(y) ])
wanted_keys = ("count","results")
result1 = dictfilt(a, wanted_keys)
data1=result1.get('results')
for elements in data1:
the_file.write(line2[0]+'\t')
for key1, value2 in elements.items():
the_file.write(str(value2)+'\t')
if key1 == 'sequence_logo':
the_file.write('\n')
the_file.close()
os.remove(outputfilename)