Skip to content

Commit

Permalink
implement downsample_max_hash in terms of downsample_scaled
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Oct 5, 2024
1 parent 34001e7 commit d9eeafd
Showing 1 changed file with 40 additions and 34 deletions.
74 changes: 40 additions & 34 deletions src/core/src/sketch/minhash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -723,21 +723,7 @@ impl KmerMinHash {
// create a downsampled copy of self
pub fn downsample_max_hash(&self, max_hash: u64) -> Result<KmerMinHash, Error> {
let scaled = scaled_for_max_hash(max_hash);

let mut new_mh = KmerMinHash::new(
scaled,
self.ksize,
self.hash_function.clone(),
self.seed,
self.abunds.is_some(),
self.num,
);
if self.abunds.is_some() {
new_mh.add_many_with_abund(&self.to_vec_abunds())?;
} else {
new_mh.add_many(&self.mins)?;
}
Ok(new_mh)
self.downsample_scaled(scaled)
}

pub fn sum_abunds(&self) -> u64 {
Expand Down Expand Up @@ -783,8 +769,25 @@ impl KmerMinHash {

// create a downsampled copy of self
pub fn downsample_scaled(&self, scaled: u64) -> Result<KmerMinHash, Error> {
let max_hash = max_hash_for_scaled(scaled);
self.downsample_max_hash(max_hash)
// @CTB shouldn't we check that new scaled > old scaled?
if self.scaled() == scaled {
Ok(self.clone()) // avoid clone CTB
} else {
let mut new_mh = KmerMinHash::new(
scaled,
self.ksize,
self.hash_function.clone(),

Check warning on line 779 in src/core/src/sketch/minhash.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/sketch/minhash.rs#L779

Added line #L779 was not covered by tests
self.seed,
self.abunds.is_some(),
self.num,
);
if self.abunds.is_some() {
new_mh.add_many_with_abund(&self.to_vec_abunds())?;
} else {
new_mh.add_many(&self.mins)?;

Check warning on line 787 in src/core/src/sketch/minhash.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/sketch/minhash.rs#L787

Added line #L787 was not covered by tests
}
Ok(new_mh)
}
}

pub fn inflate(&mut self, abunds_from: &KmerMinHash) -> Result<(), Error> {
Expand Down Expand Up @@ -1531,27 +1534,30 @@ impl KmerMinHashBTree {
// create a downsampled copy of self
pub fn downsample_max_hash(&self, max_hash: u64) -> Result<KmerMinHashBTree, Error> {
let scaled = scaled_for_max_hash(max_hash);

let mut new_mh = KmerMinHashBTree::new(
scaled,
self.ksize,
self.hash_function.clone(),
self.seed,
self.abunds.is_some(),
self.num,
);
if self.abunds.is_some() {
new_mh.add_many_with_abund(&self.to_vec_abunds())?;
} else {
new_mh.add_many(&self.mins())?;
}
Ok(new_mh)
self.downsample_scaled(scaled)
}

// create a downsampled copy of self
pub fn downsample_scaled(&self, scaled: u64) -> Result<KmerMinHashBTree, Error> {
let max_hash = max_hash_for_scaled(scaled);
self.downsample_max_hash(max_hash)
// @CTB shouldn't we check that new scaled > old scaled?
if self.scaled() == scaled {
Ok(self.clone()) // CTB avoid clone...
} else {
let mut new_mh = KmerMinHashBTree::new(
scaled,
self.ksize,
self.hash_function.clone(),

Check warning on line 1549 in src/core/src/sketch/minhash.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/sketch/minhash.rs#L1549

Added line #L1549 was not covered by tests
self.seed,
self.abunds.is_some(),
self.num,
);
if self.abunds.is_some() {
new_mh.add_many_with_abund(&self.to_vec_abunds())?;
} else {
new_mh.add_many(&self.mins())?;

Check warning on line 1557 in src/core/src/sketch/minhash.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/sketch/minhash.rs#L1557

Added line #L1557 was not covered by tests
}
Ok(new_mh)
}
}

pub fn to_vec_abunds(&self) -> Vec<(u64, u64)> {
Expand Down

0 comments on commit d9eeafd

Please sign in to comment.