@@ -82,8 +82,62 @@ function* k8_readline(fn) {
82
82
buf . destroy ( ) ;
83
83
}
84
84
85
+ function merge_hits ( b ) {
86
+ if ( b . length == 1 )
87
+ return { name1 :b [ 0 ] . name1 , name2 :b [ 0 ] . name2 , len1 :b [ 0 ] . len1 , len2 :b [ 0 ] . len2 , min_cov :b [ 0 ] . min_cov , max_cov :b [ 0 ] . max_cov , s1 :b [ 0 ] . s1 , dv :b [ 0 ] . dv } ;
88
+ b . sort ( function ( x , y ) { return x . st1 - y . st1 } ) ;
89
+ let f = [ ] , bt = [ ] ;
90
+ for ( let i = 0 ; i < b . length ; ++ i )
91
+ f [ i ] = b [ i ] . s1 , bt [ i ] = - 1 ;
92
+ for ( let i = 0 ; i < b . length ; ++ i ) {
93
+ for ( let j = 0 ; j < i ; ++ j ) {
94
+ if ( b [ j ] . st2 < b [ i ] . st2 ) {
95
+ if ( b [ j ] . en1 >= b [ i ] . en1 ) continue ;
96
+ if ( b [ j ] . en2 >= b [ i ] . en2 ) continue ;
97
+ const ov1 = b [ j ] . en1 <= b [ i ] . st1 ? 0 : b [ i ] . st1 - b [ j ] . en1 ;
98
+ const li1 = b [ i ] . en1 - b [ i ] . st1 ;
99
+ const s11 = b [ i ] . s1 / li1 * ( li1 - ov1 ) ;
100
+ const ov2 = b [ j ] . en2 <= b [ i ] . st2 ? 0 : b [ i ] . st2 - b [ j ] . en2 ;
101
+ const li2 = b [ i ] . en2 - b [ i ] . st2 ;
102
+ const s12 = b [ i ] . s1 / li2 * ( li2 - ov2 ) ;
103
+ const s1 = s11 < s12 ? s11 : s12 ;
104
+ if ( f [ i ] < f [ j ] + s1 )
105
+ f [ i ] = f [ j ] + s1 , bt [ i ] = j ;
106
+ }
107
+ }
108
+ }
109
+ let max_i = - 1 , max_f = 0 , d = [ ] ;
110
+ for ( let i = 0 ; i < b . length ; ++ i )
111
+ if ( max_f < f [ i ] )
112
+ max_f = f [ i ] , max_i = i ;
113
+ for ( let k = max_i ; k >= 0 ; k = bt [ k ] )
114
+ d . push ( k ) ;
115
+ d = d . reverse ( ) ;
116
+ let dv = 0 , tot = 0 , cov1 = 0 , cov2 = 0 , st1 = 0 , en1 = 0 , st2 = 0 , en2 = 0 ;
117
+ for ( let k = 0 ; k < d . length ; ++ k ) {
118
+ const i = d [ k ] ;
119
+ tot += b [ i ] . blen ;
120
+ dv += b [ i ] . dv * b [ i ] . blen ;
121
+ if ( b [ i ] . st1 > en1 ) {
122
+ cov1 += en1 - st1 ;
123
+ st1 = b [ i ] . st1 , en1 = b [ i ] . en1 ;
124
+ } else en1 = en1 > b [ i ] . en1 ? en1 : b [ i ] . en1 ;
125
+ if ( b [ i ] . st2 > en2 ) {
126
+ cov2 += en2 - st2 ;
127
+ st2 = b [ i ] . st2 , en2 = b [ i ] . en2 ;
128
+ } else en2 = en2 > b [ i ] . en2 ? en2 : b [ i ] . en2 ;
129
+ }
130
+ dv /= tot ;
131
+ cov1 = ( cov1 + ( en1 - st1 ) ) / b [ 0 ] . len1 ;
132
+ cov2 = ( cov2 + ( en2 - st2 ) ) / b [ 0 ] . len2 ;
133
+ const min_cov = cov1 < cov2 ? cov1 : cov2 ;
134
+ const max_cov = cov1 > cov2 ? cov1 : cov2 ;
135
+ //warn(d.length, b[0].name1, b[0].name2, min_cov, max_cov);
136
+ return { name1 :b [ 0 ] . name1 , name2 :b [ 0 ] . name2 , len1 :b [ 0 ] . len1 , len2 :b [ 0 ] . len2 , min_cov :min_cov , max_cov :max_cov , s1 :max_f , dv :dv } ;
137
+ }
138
+
85
139
function main ( args ) {
86
- let opt = { min_cov :.9 , max_dv :.01 } ;
140
+ let opt = { min_cov :.8 , max_dv :.015 } ;
87
141
for ( const o of getopt ( args , "c:d:" , [ ] ) ) {
88
142
if ( o . opt == '-c' ) opt . min_cov = parseFloat ( o . arg ) ;
89
143
else if ( o . opt == '-d' ) opt . max_dv = parseFloat ( o . arg ) ;
@@ -97,11 +151,13 @@ function main(args) {
97
151
}
98
152
99
153
// read
100
- let a = [ ] , len = { } ;
154
+ let a = [ ] , len = { } , name2len = { } ;
101
155
for ( const line of k8_readline ( args [ 0 ] ) ) {
102
156
let m , t = line . split ( "\t" ) ;
103
157
if ( t [ 4 ] != "+" ) continue ;
104
- const len1 = parseInt ( t [ 1 ] ) , len2 = parseInt ( t [ 6 ] ) ;
158
+ for ( let i = 1 ; i < 4 ; ++ i ) t [ i ] = parseInt ( t [ i ] ) ;
159
+ for ( let i = 6 ; i < 11 ; ++ i ) t [ i ] = parseInt ( t [ i ] ) ;
160
+ const len1 = t [ 1 ] , len2 = t [ 6 ] ;
105
161
let s1 = - 1 , dv = - 1.0 ;
106
162
for ( let i = 12 ; i < t . length ; ++ i ) {
107
163
if ( ( m = / ^ ( s 1 | d v ) : \S : ( \S + ) / . exec ( t [ i ] ) ) != null ) {
@@ -114,26 +170,23 @@ function main(args) {
114
170
const cov2 = ( parseInt ( t [ 8 ] ) - parseInt ( t [ 7 ] ) ) / len2 ;
115
171
const min_cov = cov1 < cov2 ? cov1 : cov2 ;
116
172
const max_cov = cov1 > cov2 ? cov1 : cov2 ;
117
- a . push ( { name1 :t [ 0 ] , name2 :t [ 5 ] , len1 :len1 , len2 :len2 , min_cov :min_cov , max_cov :max_cov , s1 :s1 , dv :dv } ) ;
173
+ name2len [ t [ 0 ] ] = len1 ;
174
+ name2len [ t [ 5 ] ] = len2 ;
175
+ a . push ( { name1 :t [ 0 ] , name2 :t [ 5 ] , len1 :len1 , len2 :len2 , min_cov :min_cov , max_cov :max_cov , s1 :s1 , dv :dv , st1 :t [ 2 ] , en1 :t [ 3 ] , st2 :t [ 7 ] , en2 :t [ 8 ] , blen :t [ 10 ] } ) ;
118
176
len [ t [ 0 ] ] = len1 , len [ t [ 5 ] ] = len2 ;
119
177
}
120
178
warn ( `Read ${ a . length } hits` ) ;
121
179
122
- // filter duplicated hits
123
- let pair = { } , n_flt = 0 , b = [ ] ;
124
- a . sort ( function ( x , y ) { return y . s1 - x . s1 } ) ;
180
+ // merge duplicated hits
181
+ let h = { } ;
125
182
for ( let i = 0 ; i < a . length ; ++ i ) {
126
183
const key = `${ a [ i ] . name1 } \t${ a [ i ] . name2 } ` ;
127
- if ( pair [ key ] != null ) {
128
- ++ n_flt ;
129
- continue ;
130
- }
131
- pair [ key ] = 1 ;
132
- b . push ( a [ i ] ) ;
184
+ if ( h [ key ] == null ) h [ key ] = [ ] ;
185
+ h [ key ] . push ( a [ i ] ) ;
133
186
}
134
- pair = null ;
135
- warn ( `Filtered ${ n_flt } hits` ) ;
136
- a = b ;
187
+ a = [ ] ;
188
+ for ( const key in h )
189
+ a . push ( merge_hits ( h [ key ] ) ) ;
137
190
138
191
// core loop
139
192
while ( a . length > 1 ) {
@@ -157,7 +210,10 @@ function main(args) {
157
210
h [ a [ i ] . name1 ] = h [ a [ i ] . name2 ] = 1 ;
158
211
}
159
212
let n = 0 ;
160
- for ( const key in h ) ++ n ;
213
+ for ( const key in h ) {
214
+ ++ n ;
215
+ delete name2len [ key ] ;
216
+ }
161
217
print ( `SD\t${ max_name } \t${ n } ` ) ;
162
218
for ( const key in h ) print ( `CL\t${ key } \t${ len [ key ] } ` ) ;
163
219
print ( "//" ) ;
@@ -169,6 +225,13 @@ function main(args) {
169
225
warn ( `Reduced the number of hits from ${ a . length } to ${ b . length } ` ) ;
170
226
a = b ;
171
227
}
228
+
229
+ // output remaining singletons
230
+ for ( const key in name2len ) {
231
+ print ( `SD\t${ key } \t1` ) ;
232
+ print ( `CL\t${ key } \t${ name2len [ key ] } ` ) ;
233
+ print ( `//` ) ;
234
+ }
172
235
}
173
236
174
237
main ( arguments ) ;
0 commit comments