-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsimtgen.py
More file actions
executable file
·80 lines (51 loc) · 1.78 KB
/
Copy pathsimtgen.py
File metadata and controls
executable file
·80 lines (51 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/python3
import random
import sys
from operator import itemgetter
def parse_ali_pt(raw_pt):
from_raw, to_raw = raw_pt.split('-')
return int(from_raw), int(to_raw)
def parse_ali(ali_raw_line):
ali_line = ali_raw_line.strip()
raw_pts = ali_line.split(' ') if ali_line else []
pts = [parse_ali_pt(raw_pt) for raw_pt in raw_pts]
return pts
def measure_monotonicity(ali):
result = []
sorted_ali = [pt[1] for pt in sorted(ali, key=itemgetter(0))]
for i in range(len(sorted_ali)):
pt = sorted_ali[i]
if i > 0 and pt < sorted_ali[i-1]:
penalty = 0
j = i
while j > 0 and pt < sorted_ali[j-1]:
penalty += 1
sorted_ali[j], sorted_ali[j-1] = sorted_ali[j-1], sorted_ali[j]
j -= 1
result.append(penalty)
return result
def do_sampling(ali_pts, nr=5, mul_factor=2):
if len(ali_pts) < mul_factor:
return []
nr_to_sample = min(int(len(ali_pts) / mul_factor), nr)
assert nr_to_sample < len(ali_pts)
result_idxs = set()
while len(result_idxs) < nr_to_sample:
result_idxs.add(random.randint(0, len(ali_pts) - 1))
return [ali_pts[i] for i in result_idxs]
def doit(alifile):
ali_fh = open(alifile, 'r')
for i, ali_raw_line in enumerate(ali_fh):
#try:
ali_pts = parse_ali(ali_raw_line)
non_monos = measure_monotonicity(ali_pts)
if not non_monos:
samples = do_sampling(ali_pts)
if samples:
for sample in samples:
print(i, *sample)
#except Exception as e:
# print(f"Exception on line nr {i}, with row `{ali_raw_line.strip()}'")
# raise e
if __name__ == "__main__":
doit(sys.argv[1])