diff --git a/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/CanopusSummaryWriter.java b/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/CanopusSummaryWriter.java index de3838bb7b..5776651abc 100644 --- a/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/CanopusSummaryWriter.java +++ b/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/CanopusSummaryWriter.java @@ -85,7 +85,6 @@ private void chooseBestNPCAssignments(ProbabilityFingerprint[] npcClassification FingerprintVersion v = npcClassifications[0].getFingerprintVersion(); if (v instanceof MaskedFingerprintVersion) v = ((MaskedFingerprintVersion) v).getMaskedFingerprintVersion(); NPCF = (NPCFingerprintVersion) v; - //todo do we have to perform index mapping? for (int i = 0; i < npcClassifications.length; ++i) { ProbabilityFingerprint fp = npcClassifications[i]; for (FPIter fpIter : fp) { @@ -151,22 +150,23 @@ public List> requiredFormulaResultAnnotations() public void addWriteCompoundSummary(ProjectWriter writer, @NotNull CompoundContainer exp, List> results) throws IOException { if (!results.isEmpty()) { if (rowsBySiriusScore != null) - addToRows(rowsBySiriusScore, FormulaScoring.reRankBy(results, List.of(ZodiacScore.class, SiriusScore.class, TreeScore.class, IsotopeScore.class), true), false); + addToRows(rowsBySiriusScore, results.stream().map(SScored::getCandidate).sorted(FormulaSummaryWriter.FROMULA_COMPARATOR).toList(), false); if (rowsByCSIScore != null) { if (results.stream().anyMatch(it -> it.getCandidate().hasAnnotation(FBCandidates.class))) numStructureResults.incrementAndGet(); - addToRows(rowsByCSIScore, FormulaScoring.reRankBy(results, List.of(TopCSIScore.class, ZodiacScore.class, SiriusScore.class, TreeScore.class, IsotopeScore.class), true), false); + addToRows(rowsByCSIScore, FormulaScoring.reRankBy(results, List.of(TopCSIScore.class, ZodiacScore.class, SiriusScore.class, TreeScore.class, IsotopeScore.class), true) + .stream().map(SScored::getCandidate).toList(), false); } if (rowsBySiriusScoreAll != null) - addToRows(rowsBySiriusScoreAll, FormulaScoring.reRankBy(results, List.of(ZodiacScore.class, SiriusScore.class, TreeScore.class, IsotopeScore.class), true), true); + addToRows(rowsBySiriusScoreAll, results.stream().map(SScored::getCandidate).sorted(FormulaSummaryWriter.FROMULA_COMPARATOR).toList(), true); } } - private void addToRows(List rows, List> results, boolean all) { + private void addToRows(List rows, List results, boolean all) { // sometimes we have multiple results with same score (adducts!). In this case, we list all of them in // a separate summary file int i = 0; - SScored hit; + FormulaResult hit; ArrayList cfFingerprints = new ArrayList<>(); ArrayList npcFingerprints = new ArrayList<>(); ArrayList formulas = new ArrayList<>(), preForms = new ArrayList<>(); @@ -174,8 +174,8 @@ private void addToRows(List rows, List cr = hit.getCandidate().getAnnotation(CanopusResult.class); + id = hit.getId(); + final Optional cr = hit.getAnnotation(CanopusResult.class); final var cid = id; cr.ifPresent(canopusResult -> { cfFingerprints.add(canopusResult.getCanopusFingerprint()); @@ -185,7 +185,7 @@ private void addToRows(List rows, List 0) { lock.writeLock().lock(); try { diff --git a/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/FormulaSummaryWriter.java b/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/FormulaSummaryWriter.java index 1efdedb16a..2a198d0294 100644 --- a/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/FormulaSummaryWriter.java +++ b/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/FormulaSummaryWriter.java @@ -24,10 +24,12 @@ import de.unijena.bioinf.ChemistryBase.chem.MolecularFormula; import de.unijena.bioinf.ChemistryBase.chem.PrecursorIonType; import de.unijena.bioinf.ChemistryBase.chem.RetentionTime; +import de.unijena.bioinf.ChemistryBase.fp.*; import de.unijena.bioinf.ChemistryBase.ms.ft.FTree; import de.unijena.bioinf.ChemistryBase.ms.ft.TreeStatistics; import de.unijena.bioinf.ChemistryBase.utils.Utils; import de.unijena.bioinf.GibbsSampling.ZodiacScore; +import de.unijena.bioinf.canopus.CanopusResult; import de.unijena.bioinf.elgordo.LipidSpecies; import de.unijena.bioinf.fingerid.ConfidenceScore; import de.unijena.bioinf.fingerid.blast.TopCSIScore; @@ -50,16 +52,32 @@ import java.util.stream.Collectors; public class FormulaSummaryWriter extends CandidateSummarizer { + public static double getCanopusScore(FormulaResult r) { + return r.getAnnotation(CanopusResult.class).map(c -> { + ProbabilityFingerprint fp = c.getCanopusFingerprint(); + FingerprintVersion v = fp.getFingerprintVersion(); + if (v instanceof MaskedFingerprintVersion) + v = ((MaskedFingerprintVersion) v).getMaskedFingerprintVersion(); + return fp.getProbability(((ClassyFireFingerprintVersion) v). + getIndexOfMolecularProperty(((ClassyFireFingerprintVersion) v).getPrimaryClass(fp))); + }).orElse(0d); + } final static List> RANKING_SCORES = List.of(ZodiacScore.class, SiriusScore.class, TreeScore.class, IsotopeScore.class, TopCSIScore.class); - final static List> RANKING_SCORES_SELECTING_TOP1 = List.of(ZodiacScore.class, SiriusScore.class, TreeScore.class, IsotopeScore.class); + + public final static Comparator FROMULA_COMPARATOR = ((Comparator) (o1, o2) -> + FormulaScoring.comparingMultiScore(RANKING_SCORES, true) + .compare(o1.getAnnotationOrNull(FormulaScoring.class), + o2.getAnnotationOrNull(FormulaScoring.class)) + ).thenComparing(Comparator.comparing(FormulaSummaryWriter::getCanopusScore).reversed()); + private final Lock lock = new ReentrantLock(); final LinkedHashMap, String> globalTypes = new LinkedHashMap<>(); final Set globalResults; final Set globalResultsWithAdducts; - final Map>> globalResultsAll; + final Map> globalResultsAll; public FormulaSummaryWriter(boolean writeTopHitGlobal, boolean writeTopHitWithAdductsGlobal, boolean writeFullGlobal) { super(writeTopHitGlobal, writeTopHitWithAdductsGlobal, writeFullGlobal); @@ -82,23 +100,18 @@ public void addWriteCompoundSummary(ProjectWriter writer, @NotNull CompoundConta if (formulaResults == null || formulaResults.isEmpty()) return; - List> res = FormulaScoring.reRankBy(formulaResults, RANKING_SCORES, true); - - if (res.isEmpty()) - return; - - List> results = new ArrayList<>(res.size()); + List res = formulaResults.stream().map(SScored::getCandidate).sorted(FROMULA_COMPARATOR).toList(); + List results = new ArrayList<>(res.size()); int rank = 0; MolecularFormula preFormula = null; - for (SScored s : res) { - FormulaResult c = s.getCandidate(); + for (FormulaResult c : res) { if (preFormula == null || !c.getId().getPrecursorFormula().equals(preFormula)) rank++; preFormula = c.getId().getPrecursorFormula(); - results.add(new SScored<>(ResultEntry.of(c, exp, rank), s.getScoreObject())); + results.add(ResultEntry.of(c, exp, rank)); } - List> topResultWithAdducts = extractAllTopScoringResults(results, RANKING_SCORES_SELECTING_TOP1); + List topResultWithAdducts = extractAllTopScoringResults(results); writer.inDirectory(exp.getId().getDirectoryName(), () -> { @@ -107,14 +120,12 @@ public void addWriteCompoundSummary(ProjectWriter writer, @NotNull CompoundConta final AtomicBoolean first = new AtomicBoolean(true); - results.forEach(r -> { - FormulaScoring s = r.getCandidate().getScoring(); - if (first.getAndSet(false)) { - if (globalResults != null) { - ResultEntry bestResult = /*topResultWithAdducts.size() > 1 ? resolveIonizationOnly(r.getCandidate()) :*/ r.getCandidate(); - withLock(() -> this.globalResults.add(bestResult)); - } - } + results.forEach(c -> { + FormulaScoring s = c.getScoring(); + if (first.getAndSet(false)) + if (globalResults != null) + withLock(() -> this.globalResults.add(c)); + s.annotations().forEach((key, value) -> { if (value != null && !value.isNa()) { types.putIfAbsent(value.getClass(), value.name()); @@ -128,10 +139,9 @@ public void addWriteCompoundSummary(ProjectWriter writer, @NotNull CompoundConta types.remove(ConfidenceScore.class); List r = results.stream() .sorted((i1, i2) -> FormulaScoring.comparingMultiScore( - ProjectSpaceManager.scorePriorities() - .stream().filter(types::containsKey).toList()).compare( - i1.getCandidate().scoring, - i2.getCandidate().scoring)).map(SScored::getCandidate) + ProjectSpaceManager.scorePriorities() + .stream().filter(types::containsKey).toList()) + .compare(i1.scoring, i2.scoring)) .toList(); writeCSV(w, types, r, false); }); @@ -142,24 +152,19 @@ public void addWriteCompoundSummary(ProjectWriter writer, @NotNull CompoundConta if (globalResultsWithAdducts != null || globalResultsAll != null) withLock(() -> { if (globalResultsWithAdducts != null) - topResultWithAdducts.forEach(r -> this.globalResultsWithAdducts.add(r.getCandidate())); + this.globalResultsWithAdducts.addAll(topResultWithAdducts); if (globalResultsAll != null) - globalResultsAll.put(results.get(0).getCandidate(), new ArrayList<>(results)); + globalResultsAll.put(results.get(0), new ArrayList<>(results)); }); } - private List> extractAllTopScoringResults(List> sortedResults, List> rankingScores) { + private List extractAllTopScoringResults(List sortedResults) { if (sortedResults.isEmpty()) return Collections.emptyList(); if (sortedResults.size() == 1) return Collections.singletonList(sortedResults.get(0)); - SScored best = sortedResults.get(0); - - List> topResultsWithAdducts = sortedResults.stream() - .takeWhile(r -> best.getCandidate().preFormula.equals(r.getCandidate().preFormula)) - .collect(Collectors.toList()); - + ResultEntry best = sortedResults.get(0); //candidates with same score should have the same adduct. - return topResultsWithAdducts; + return sortedResults.stream().takeWhile(r -> best.preFormula.equals(r.preFormula)).toList(); } @Override @@ -184,7 +189,7 @@ public void writeProjectSpaceSummary(ProjectWriter writer) throws IOException { if (globalResultsAll != null) { List all = globalResultsAll.keySet().stream() .sorted(Comparator.comparing(s -> s.dirName, Utils.ALPHANUMERIC_COMPARATOR)) - .flatMap(s -> globalResultsAll.get(s).stream().map(SScored::getCandidate)).toList(); + .flatMap(s -> globalResultsAll.get(s).stream()).toList(); writer.textFile(SummaryLocations.FORMULA_SUMMARY_ALL, w -> writeCSV(w, globalTypes, all, true)); } } finally { @@ -254,7 +259,7 @@ private void writeCSV(Writer w, LinkedHashMap, Str } private static class ResultEntry { - private int formulaRank; + private final int formulaRank; public final FormulaScoring scoring; public final MolecularFormula molecularFormula; public final MolecularFormula preFormula;