Skip to content

Commit a10d4f4

Browse files
committed
merge colinear blocks
1 parent 1eea2fe commit a10d4f4

File tree

1 file changed

+80
-17
lines changed

1 file changed

+80
-17
lines changed

Diff for: misc/pafcluster.js

+80-17
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,62 @@ function* k8_readline(fn) {
8282
buf.destroy();
8383
}
8484

85+
function merge_hits(b) {
86+
if (b.length == 1)
87+
return { name1:b[0].name1, name2:b[0].name2, len1:b[0].len1, len2:b[0].len2, min_cov:b[0].min_cov, max_cov:b[0].max_cov, s1:b[0].s1, dv:b[0].dv };
88+
b.sort(function(x, y) { return x.st1 - y.st1 });
89+
let f = [], bt = [];
90+
for (let i = 0; i < b.length; ++i)
91+
f[i] = b[i].s1, bt[i] = -1;
92+
for (let i = 0; i < b.length; ++i) {
93+
for (let j = 0; j < i; ++j) {
94+
if (b[j].st2 < b[i].st2) {
95+
if (b[j].en1 >= b[i].en1) continue;
96+
if (b[j].en2 >= b[i].en2) continue;
97+
const ov1 = b[j].en1 <= b[i].st1? 0 : b[i].st1 - b[j].en1;
98+
const li1 = b[i].en1 - b[i].st1;
99+
const s11 = b[i].s1 / li1 * (li1 - ov1);
100+
const ov2 = b[j].en2 <= b[i].st2? 0 : b[i].st2 - b[j].en2;
101+
const li2 = b[i].en2 - b[i].st2;
102+
const s12 = b[i].s1 / li2 * (li2 - ov2);
103+
const s1 = s11 < s12? s11 : s12;
104+
if (f[i] < f[j] + s1)
105+
f[i] = f[j] + s1, bt[i] = j;
106+
}
107+
}
108+
}
109+
let max_i = -1, max_f = 0, d = [];
110+
for (let i = 0; i < b.length; ++i)
111+
if (max_f < f[i])
112+
max_f = f[i], max_i = i;
113+
for (let k = max_i; k >= 0; k = bt[k])
114+
d.push(k);
115+
d = d.reverse();
116+
let dv = 0, tot = 0, cov1 = 0, cov2 = 0, st1 = 0, en1 = 0, st2 = 0, en2 = 0;
117+
for (let k = 0; k < d.length; ++k) {
118+
const i = d[k];
119+
tot += b[i].blen;
120+
dv += b[i].dv * b[i].blen;
121+
if (b[i].st1 > en1) {
122+
cov1 += en1 - st1;
123+
st1 = b[i].st1, en1 = b[i].en1;
124+
} else en1 = en1 > b[i].en1? en1 : b[i].en1;
125+
if (b[i].st2 > en2) {
126+
cov2 += en2 - st2;
127+
st2 = b[i].st2, en2 = b[i].en2;
128+
} else en2 = en2 > b[i].en2? en2 : b[i].en2;
129+
}
130+
dv /= tot;
131+
cov1 = (cov1 + (en1 - st1)) / b[0].len1;
132+
cov2 = (cov2 + (en2 - st2)) / b[0].len2;
133+
const min_cov = cov1 < cov2? cov1 : cov2;
134+
const max_cov = cov1 > cov2? cov1 : cov2;
135+
//warn(d.length, b[0].name1, b[0].name2, min_cov, max_cov);
136+
return { name1:b[0].name1, name2:b[0].name2, len1:b[0].len1, len2:b[0].len2, min_cov:min_cov, max_cov:max_cov, s1:max_f, dv:dv };
137+
}
138+
85139
function main(args) {
86-
let opt = { min_cov:.9, max_dv:.01 };
140+
let opt = { min_cov:.8, max_dv:.015 };
87141
for (const o of getopt(args, "c:d:", [])) {
88142
if (o.opt == '-c') opt.min_cov = parseFloat(o.arg);
89143
else if (o.opt == '-d') opt.max_dv = parseFloat(o.arg);
@@ -97,11 +151,13 @@ function main(args) {
97151
}
98152

99153
// read
100-
let a = [], len = {};
154+
let a = [], len = {}, name2len = {};
101155
for (const line of k8_readline(args[0])) {
102156
let m, t = line.split("\t");
103157
if (t[4] != "+") continue;
104-
const len1 = parseInt(t[1]), len2 = parseInt(t[6]);
158+
for (let i = 1; i < 4; ++i) t[i] = parseInt(t[i]);
159+
for (let i = 6; i < 11; ++i) t[i] = parseInt(t[i]);
160+
const len1 = t[1], len2 = t[6];
105161
let s1 = -1, dv = -1.0;
106162
for (let i = 12; i < t.length; ++i) {
107163
if ((m = /^(s1|dv):\S:(\S+)/.exec(t[i])) != null) {
@@ -114,26 +170,23 @@ function main(args) {
114170
const cov2 = (parseInt(t[8]) - parseInt(t[7])) / len2;
115171
const min_cov = cov1 < cov2? cov1 : cov2;
116172
const max_cov = cov1 > cov2? cov1 : cov2;
117-
a.push({ name1:t[0], name2:t[5], len1:len1, len2:len2, min_cov:min_cov, max_cov:max_cov, s1:s1, dv:dv });
173+
name2len[t[0]] = len1;
174+
name2len[t[5]] = len2;
175+
a.push({ name1:t[0], name2:t[5], len1:len1, len2:len2, min_cov:min_cov, max_cov:max_cov, s1:s1, dv:dv, st1:t[2], en1:t[3], st2:t[7], en2:t[8], blen:t[10] });
118176
len[t[0]] = len1, len[t[5]] = len2;
119177
}
120178
warn(`Read ${a.length} hits`);
121179

122-
// filter duplicated hits
123-
let pair = {}, n_flt = 0, b = [];
124-
a.sort(function(x, y) { return y.s1 - x.s1 });
180+
// merge duplicated hits
181+
let h = {};
125182
for (let i = 0; i < a.length; ++i) {
126183
const key = `${a[i].name1}\t${a[i].name2}`;
127-
if (pair[key] != null) {
128-
++n_flt;
129-
continue;
130-
}
131-
pair[key] = 1;
132-
b.push(a[i]);
184+
if (h[key] == null) h[key] = [];
185+
h[key].push(a[i]);
133186
}
134-
pair = null;
135-
warn(`Filtered ${n_flt} hits`);
136-
a = b;
187+
a = [];
188+
for (const key in h)
189+
a.push(merge_hits(h[key]));
137190

138191
// core loop
139192
while (a.length > 1) {
@@ -157,7 +210,10 @@ function main(args) {
157210
h[a[i].name1] = h[a[i].name2] = 1;
158211
}
159212
let n = 0;
160-
for (const key in h) ++n;
213+
for (const key in h) {
214+
++n;
215+
delete name2len[key];
216+
}
161217
print(`SD\t${max_name}\t${n}`);
162218
for (const key in h) print(`CL\t${key}\t${len[key]}`);
163219
print("//");
@@ -169,6 +225,13 @@ function main(args) {
169225
warn(`Reduced the number of hits from ${a.length} to ${b.length}`);
170226
a = b;
171227
}
228+
229+
// output remaining singletons
230+
for (const key in name2len) {
231+
print(`SD\t${key}\t1`);
232+
print(`CL\t${key}\t${name2len[key]}`);
233+
print(`//`);
234+
}
172235
}
173236

174237
main(arguments);

0 commit comments

Comments
 (0)