Skip to content

Commit

Permalink
fix by doing too much downsampling
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Oct 11, 2024
1 parent b118779 commit 464ec60
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 9 deletions.
2 changes: 1 addition & 1 deletion include/sourmash.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ enum SourmashErrorCode {
SOURMASH_ERROR_CODE_NON_EMPTY_MIN_HASH = 106,
SOURMASH_ERROR_CODE_MISMATCH_NUM = 107,
SOURMASH_ERROR_CODE_NEEDS_ABUNDANCE_TRACKING = 108,
SOURMASH_ERROR_CODE_CANNOT_UPSAMPLE = 109,
SOURMASH_ERROR_CODE_CANNOT_UPSAMPLE_SCALED = 109,
SOURMASH_ERROR_CODE_INVALID_DNA = 1101,
SOURMASH_ERROR_CODE_INVALID_PROT = 1102,
SOURMASH_ERROR_CODE_INVALID_CODON_LENGTH = 1103,
Expand Down
12 changes: 10 additions & 2 deletions src/core/src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub mod search;

use std::path::Path;

use std::cmp::max;
use getset::{CopyGetters, Getters, Setters};
use log::trace;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -220,10 +221,17 @@ pub fn calculate_gather_stats(
confidence: Option<f64>,
) -> Result<GatherResult> {
// get match_mh
let match_mh = match_sig.minhash().unwrap();
let match_mh = match_sig.minhash().expect("cannot retrieve sketch");
let match_mh = match_mh.clone();

eprintln!("XXX 2 {}, {}", match_mh.scaled(), query.scaled());

let max_scaled = max(match_mh.scaled(), query.scaled());
let query = query.downsample_scaled(max_scaled).expect("cannot downsample query");
let match_mh = match_mh.downsample_scaled(max_scaled).expect("cannot downsample match");

// calculate intersection
let isect = match_mh.intersection(&query)?;
let isect = match_mh.intersection(&query).expect("could not do intersection");
let isect_size = isect.0.len();
trace!("isect_size: {}", isect_size);
trace!("query.size: {}", query.size());
Expand Down
17 changes: 12 additions & 5 deletions src/core/src/index/revindex/disk_revindex.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher};
use std::cmp::max;
use std::path::Path;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, RwLock};
Expand Down Expand Up @@ -398,24 +399,30 @@ impl RevIndexOps for RevIndex {

// get downsampled minhashes for comparison.
let match_mh = match_sig.minhash().unwrap().clone();
query = query.downsample_scaled(match_mh.scaled())?;
orig_query_ds = orig_query_ds.downsample_scaled(match_mh.scaled())?;

let max_scaled = max(query.scaled(), match_mh.scaled());

let match_mh = match_mh.downsample_scaled(max_scaled).expect("cannot downsample match");

eprintln!("XXX {}, {}, {}", query.scaled(), match_mh.scaled(), orig_query_ds.scaled());
query = query.downsample_scaled(max_scaled)?;
orig_query_ds = orig_query_ds.downsample_scaled(max_scaled)?;

// just calculate essentials here
let gather_result_rank = matches.len();

let query_mh = KmerMinHash::from(query.clone());

// grab the specific intersection:
let isect = match_mh.intersection(&query_mh)?;
let isect = match_mh.intersection(&query_mh).expect("failed to intersect");
let mut isect_mh = match_mh.clone();
isect_mh.clear();
isect_mh.add_many(&isect.0)?;

// Calculate stats
let gather_result = calculate_gather_stats(
&orig_query_ds,
KmerMinHash::from(query.clone()),
query_mh,
match_sig,
match_size,
gather_result_rank,
Expand All @@ -424,7 +431,7 @@ impl RevIndexOps for RevIndex {
calc_abund_stats,
calc_ani_ci,
ani_confidence_interval_fraction,
)?;
).expect("could not calculate gather stats");
// keep track of the sum weighted found
sum_weighted_found = gather_result.sum_weighted_found();
matches.push(gather_result);
Expand Down
2 changes: 1 addition & 1 deletion src/core/src/index/revindex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,7 @@ mod test {
0,
&query,
Some(selection.clone()),
)?;
).expect("failed to gather!");

{
let mut index = index;
Expand Down

0 comments on commit 464ec60

Please sign in to comment.