This repository was archived by the owner on Mar 21, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaln_reader.py
75 lines (63 loc) · 1.8 KB
/
aln_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import sys,os
from sequence import Sequence
"""
this is for reading some common alignment files
a list of sequences will be returned
NOTE: if you want to read an alignment in fasta format
just use the fasta reader in seq_reader.py
"""
"""
This will read a phylip alignment file and return the
list of seqs
The phylip file is in the format
numoftaxa numofsites
seqlabel sequence
seqlabel sequence
we assume that the phylip files are extended and not
strict (in terms of what type and how much white space
and how many characters for taxon names)
"""
def read_phylip_file(infilename):
infile = open(infilename,"r")
seqlist = []
# first line is the number of taxa and num of sites
# we don't really even need to read this line,
# so let's just skip it
i = infile.readline()
for i in infile:
if len(i) > 2:
spls = i.strip().split()
name = spls[0].strip()
seq = spls[1].strip()
tseq = Sequence(name=name,seq=seq)
seqlist.append(tseq)
infile.close()
return seqlist
"""
This will read a phylip alignment file with continuous characters and return the
list of seqs
The phylip file is in the format
numoftaxa numofsites
seqlabel contvalue contvalue
seqlabel contvalue contvalue
we assume that the phylip files are extended and not
strict (in terms of what type and how much white space
and how many characters for taxon names)
"""
def read_phylip_cont_file(infilename):
infile = open(infilename,"r")
seqlist = []
# first line is the number of taxa and num of sites
# we don't really even need to read this line,
# so let's just skip it
i = infile.readline()
for i in infile:
if len(i) > 2:
spls = i.strip().split("\t")
name = spls[0].strip()
seq = spls[1].strip().split(" ")
tseq = Sequence(name=name)
tseq.set_cont_values(seq)
seqlist.append(tseq)
infile.close()
return seqlist