-
Notifications
You must be signed in to change notification settings - Fork 24
/
MES_q2b_check_file_type.py
31 lines (26 loc) · 1.07 KB
/
MES_q2b_check_file_type.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import re
# This function takes a single string and returns whether it is DNA, Amino Acid or Unknown
def molecule_type(molecule):
# DNA is defined as consisting of only A, T, C and G characters
if re.search(r"^[ACTG]*$", molecule):
return("DNA")
# Amino Acid is defined as consisting of only ACDEFGHIKLMNOPQRSTVWY characters
elif re.search(r"^[ACDEFGHIKLMNOPQRSTVWY]*$", molecule):
return("Amino Acid")
# Everything else is Unknown
else:
return("Unknown")
# Assertions check if the function is working properly
assert molecule_type("AAAA") == "DNA"
assert molecule_type("ACTGGGA") == "DNA"
assert molecule_type("ACTGGGN") == "Amino Acid"
assert molecule_type("NNNNNNN") == "Amino Acid"
assert molecule_type("GHMKL") == "Amino Acid"
assert molecule_type("GHJKL") == "Unknown"
assert molecule_type("RB") == "Unknown"
assert molecule_type("T") == "DNA"
assert molecule_type("TF") == "Amino Acid"
# Iterate over the input file and determine the molecule type for each sequence (one per line)
fh = open("inputfile.txt")
for line in fh:
print(molecule_type(line))