From ff4eeda317541270b824441b5f6e6f897dcfcd19 Mon Sep 17 00:00:00 2001 From: Luiz Irber Date: Sun, 13 Oct 2024 11:15:22 +0000 Subject: [PATCH] feat: Implement TryInto to convert Signature and SigStore into KmerMinHash (#3348) Ref: https://github.com/sourmash-bio/sourmash_plugin_branchwater/pull/467/files#r1797783380 Implement `TryInto` for Signature and SigStore to avoid having to clone a (potentially big) minhash sketch. --- src/core/src/collection.rs | 1 - src/core/src/errors.rs | 15 +++++++++++++++ src/core/src/signature.rs | 22 ++++++++++++++++++++++ src/core/src/storage/mod.rs | 10 ++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index aa8e33e6e..9526eab23 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -241,7 +241,6 @@ mod test { use crate::prelude::Select; use crate::selection::Selection; use crate::signature::Signature; - use crate::Result; #[test] fn sigstore_selection_with_downsample() { diff --git a/src/core/src/errors.rs b/src/core/src/errors.rs index c1f3562e6..30d269a13 100644 --- a/src/core/src/errors.rs +++ b/src/core/src/errors.rs @@ -31,6 +31,15 @@ pub enum SourmashError { #[error("sketch needs abundance for this operation")] NeedsAbundanceTracking, + #[error("Expected a MinHash sketch in this signature")] + NoMinHashFound, + + #[error("Empty signature")] + EmptySignature, + + #[error("Multiple sketches found, expected one")] + MultipleSketchesFound, + #[error("Invalid hash function: {function:?}")] InvalidHashFunction { function: String }, @@ -108,6 +117,9 @@ pub enum SourmashErrorCode { MismatchNum = 1_07, NeedsAbundanceTracking = 1_08, CannotUpsampleScaled = 1_09, + NoMinHashFound = 1_10, + EmptySignature = 1_11, + MultipleSketchesFound = 1_12, // Input sequence errors InvalidDNA = 11_01, InvalidProt = 11_02, @@ -147,6 +159,9 @@ impl SourmashErrorCode { SourmashError::MismatchSeed => SourmashErrorCode::MismatchSeed, SourmashError::MismatchSignatureType => SourmashErrorCode::MismatchSignatureType, SourmashError::NonEmptyMinHash { .. } => SourmashErrorCode::NonEmptyMinHash, + SourmashError::NoMinHashFound => SourmashErrorCode::NoMinHashFound, + SourmashError::EmptySignature => SourmashErrorCode::EmptySignature, + SourmashError::MultipleSketchesFound => SourmashErrorCode::MultipleSketchesFound, SourmashError::InvalidDNA { .. } => SourmashErrorCode::InvalidDNA, SourmashError::InvalidProt { .. } => SourmashErrorCode::InvalidProt, SourmashError::InvalidCodonLength { .. } => SourmashErrorCode::InvalidCodonLength, diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 8cc4ba915..520f8ad04 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -887,6 +887,28 @@ impl PartialEq for Signature { } } +impl TryInto for Signature { + type Error = Error; + + fn try_into(self) -> Result { + match self.signatures.len() { + 1 => self + .signatures + .into_iter() + .find_map(|sk| { + if let Sketch::MinHash(mh) = sk { + Some(mh) + } else { + None + } + }) + .ok_or_else(|| Error::NoMinHashFound), + 0 => Err(Error::EmptySignature), + 2.. => Err(Error::MultipleSketchesFound), + } + } +} + #[cfg(test)] mod test { use std::fs::File; diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index 12f456fc2..536ec5292 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -16,6 +16,7 @@ use typed_builder::TypedBuilder; use crate::errors::ReadDataError; use crate::prelude::*; use crate::signature::SigsTrait; +use crate::sketch::minhash::KmerMinHash; use crate::sketch::Sketch; use crate::{Error, Result}; @@ -550,6 +551,15 @@ impl From for SigStore { } } +impl TryInto for SigStore { + type Error = crate::Error; + + fn try_into(self) -> std::result::Result { + let sig: Signature = self.into(); + sig.try_into() + } +} + impl Comparable for SigStore { fn similarity(&self, other: &SigStore) -> f64 { let ng: &Signature = self.data().unwrap();