Skip to content

Commit 7edeec6

Browse files
committedMay 1, 2021
r1026: fixed bugs in seed sampling add hifi
1 parent feb92d3 commit 7edeec6

File tree

3 files changed

+27
-14
lines changed

3 files changed

+27
-14
lines changed
 

‎main.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include "mmpriv.h"
88
#include "ketopt.h"
99

10-
#define MM_VERSION "2.18-r1025-dirty"
10+
#define MM_VERSION "2.18-r1026-dirty"
1111

1212
#ifdef __linux__
1313
#include <sys/resource.h>

‎options.c

+7-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
2626
opt->max_chain_skip = 25;
2727
opt->max_chain_iter = 5000;
2828
opt->chain_gap_scale = 1.0f;
29-
opt->max_max_occ = 5000;
29+
opt->max_max_occ = 4095;
3030
opt->occ_dist = 0;
3131

3232
opt->mask_level = 0.5f;
@@ -91,6 +91,12 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
9191
io->flag |= MM_I_HPC, io->k = 19;
9292
} else if (strcmp(preset, "map-ont") == 0) {
9393
io->flag = 0, io->k = 15;
94+
} else if (strcmp(preset, "hifi") == 0 || strcmp(preset, "ccs") == 0) {
95+
io->flag = 0, io->k = 19, io->w = 19;
96+
mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1;
97+
mo->occ_dist = 100;
98+
mo->min_mid_occ = 100;
99+
mo->min_dp_max = 200;
94100
} else if (strcmp(preset, "asm5") == 0) {
95101
io->flag = 0, io->k = 19, io->w = 19;
96102
mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;

‎seed.c

+19-12
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,28 @@
11
#include "mmpriv.h"
22
#include "kalloc.h"
3+
#include "ksort.h"
34

4-
mm_seed_t *mm_seed_collect_all(void *km, const mm_idx_t *mi, const mm128_v *mv)
5+
mm_seed_t *mm_seed_collect_all(void *km, const mm_idx_t *mi, const mm128_v *mv, int32_t *n_m_)
56
{
67
mm_seed_t *m;
78
size_t i;
9+
int32_t k;
810
m = (mm_seed_t*)kmalloc(km, mv->n * sizeof(mm_seed_t));
9-
for (i = 0; i < mv->n; ++i) {
11+
for (i = k = 0; i < mv->n; ++i) {
1012
const uint64_t *cr;
11-
mm_seed_t *q = &m[i];
13+
mm_seed_t *q;
1214
mm128_t *p = &mv->a[i];
1315
uint32_t q_pos = (uint32_t)p->y, q_span = p->x & 0xff;
1416
int t;
1517
cr = mm_idx_get(mi, p->x>>8, &t);
18+
if (t == 0) continue;
19+
q = &m[k++];
1620
q->q_pos = q_pos, q->q_span = q_span, q->cr = cr, q->n = t, q->seg_id = p->y >> 32;
1721
q->is_tandem = q->flt = 0;
1822
if (i > 0 && p->x>>8 == mv->a[i - 1].x>>8) q->is_tandem = 1;
1923
if (i < mv->n - 1 && p->x>>8 == mv->a[i + 1].x>>8) q->is_tandem = 1;
2024
}
25+
*n_m_ = k;
2126
return m;
2227
}
2328

@@ -37,18 +42,19 @@ void mm_seed_select(int32_t n, mm_seed_t *a, int len, int max_occ, int max_max_o
3742
for (i = 0, last0 = -1; i <= n; ++i) {
3843
if (i == n || a[i].n <= max_occ) {
3944
if (i - last0 > 1) {
40-
int32_t ps = last0 < 0? 0 : (uint32_t)a[last0].q_pos;
41-
int32_t pe = i == n? len : (uint32_t)a[i].q_pos;
45+
int32_t ps = last0 < 0? 0 : (uint32_t)a[last0].q_pos>>1;
46+
int32_t pe = i == n? len : (uint32_t)a[i].q_pos>>1;
4247
int32_t j, k, st = last0 + 1, en = i;
4348
int32_t max_high_occ = (int32_t)((double)(pe - ps) / dist + .499);
49+
//fprintf(stderr, "Y\t%d\t%d\n", ps, pe);
4450
if (max_high_occ > MAX_MAX_HIGH_OCC)
4551
max_high_occ = MAX_MAX_HIGH_OCC;
4652
for (j = st, k = 0; j < en && k < max_high_occ; ++j, ++k)
4753
b[k] = (uint64_t)a[j].n<<32 | j;
4854
ks_heapmake_uint64_t(k, b); // initialize the binomial heap
4955
for (; j < en; ++j) { // if there are more, choose top max_high_occ
50-
if (a[j].n < (uint32_t)b[0]) { // then update the heap
51-
b[0] = (b[0]&0xFFFFFFFF00000000ULL) | j;
56+
if (a[j].n < (int32_t)(b[0]>>32)) { // then update the heap
57+
b[0] = (uint64_t)a[j].n<<32 | j;
5258
ks_heapdown_uint64_t(0, k, b);
5359
}
5460
}
@@ -65,21 +71,22 @@ void mm_seed_select(int32_t n, mm_seed_t *a, int len, int max_occ, int max_max_o
6571

6672
mm_seed_t *mm_collect_matches(void *km, int *_n_m, int qlen, int max_occ, int max_max_occ, int dist, const mm_idx_t *mi, const mm128_v *mv, int64_t *n_a, int *rep_len, int *n_mini_pos, uint64_t **mini_pos)
6773
{
68-
int rep_st = 0, rep_en = 0, n_m;
74+
int rep_st = 0, rep_en = 0, n_m, n_m0;
6975
size_t i;
7076
mm_seed_t *m;
7177
*n_mini_pos = 0;
7278
*mini_pos = (uint64_t*)kmalloc(km, mv->n * sizeof(uint64_t));
73-
m = mm_seed_collect_all(km, mi, mv);
79+
m = mm_seed_collect_all(km, mi, mv, &n_m0);
7480
if (dist > 0 && max_max_occ > max_occ) {
75-
mm_seed_select(mv->n, m, qlen, max_occ, max_max_occ, dist);
81+
mm_seed_select(n_m0, m, qlen, max_occ, max_max_occ, dist);
7682
} else {
77-
for (i = 0; i < mv->n; ++i)
83+
for (i = 0; i < n_m0; ++i)
7884
if (m[i].n > max_occ)
7985
m[i].flt = 1;
8086
}
81-
for (i = 0, n_m = 0, *rep_len = 0, *n_a = 0; i < mv->n; ++i) {
87+
for (i = 0, n_m = 0, *rep_len = 0, *n_a = 0; i < n_m0; ++i) {
8288
mm_seed_t *q = &m[i];
89+
//fprintf(stderr, "X\t%d\t%d\t%d\n", q->q_pos>>1, q->n, q->flt);
8390
if (q->flt) {
8491
int en = (q->q_pos >> 1) + 1, st = en - q->q_span;
8592
if (st > rep_en) {

0 commit comments

Comments
 (0)
Please sign in to comment.