Skip to content

Commit

Permalink
fix canopus and formula summary writing with canopus result tie breaker
Browse files Browse the repository at this point in the history
(cherry picked from commit fe443a8)
  • Loading branch information
mfleisch committed Jan 26, 2024
1 parent 6d9abc8 commit 7f05cd9
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ private void chooseBestNPCAssignments(ProbabilityFingerprint[] npcClassification
FingerprintVersion v = npcClassifications[0].getFingerprintVersion();
if (v instanceof MaskedFingerprintVersion) v = ((MaskedFingerprintVersion) v).getMaskedFingerprintVersion();
NPCF = (NPCFingerprintVersion) v;
//todo do we have to perform index mapping?
for (int i = 0; i < npcClassifications.length; ++i) {
ProbabilityFingerprint fp = npcClassifications[i];
for (FPIter fpIter : fp) {
Expand Down Expand Up @@ -151,31 +150,32 @@ public List<Class<? extends DataAnnotation>> requiredFormulaResultAnnotations()
public void addWriteCompoundSummary(ProjectWriter writer, @NotNull CompoundContainer exp, List<? extends SScored<FormulaResult, ? extends FormulaScore>> results) throws IOException {
if (!results.isEmpty()) {
if (rowsBySiriusScore != null)
addToRows(rowsBySiriusScore, FormulaScoring.reRankBy(results, List.of(ZodiacScore.class, SiriusScore.class, TreeScore.class, IsotopeScore.class), true), false);
addToRows(rowsBySiriusScore, results.stream().map(SScored::getCandidate).sorted(FormulaSummaryWriter.FROMULA_COMPARATOR).toList(), false);
if (rowsByCSIScore != null) {
if (results.stream().anyMatch(it -> it.getCandidate().hasAnnotation(FBCandidates.class)))
numStructureResults.incrementAndGet();
addToRows(rowsByCSIScore, FormulaScoring.reRankBy(results, List.of(TopCSIScore.class, ZodiacScore.class, SiriusScore.class, TreeScore.class, IsotopeScore.class), true), false);
addToRows(rowsByCSIScore, FormulaScoring.reRankBy(results, List.of(TopCSIScore.class, ZodiacScore.class, SiriusScore.class, TreeScore.class, IsotopeScore.class), true)
.stream().map(SScored::getCandidate).toList(), false);
}
if (rowsBySiriusScoreAll != null)
addToRows(rowsBySiriusScoreAll, FormulaScoring.reRankBy(results, List.of(ZodiacScore.class, SiriusScore.class, TreeScore.class, IsotopeScore.class), true), true);
addToRows(rowsBySiriusScoreAll, results.stream().map(SScored::getCandidate).sorted(FormulaSummaryWriter.FROMULA_COMPARATOR).toList(), true);
}
}

private void addToRows(List<CanopusSummaryRow> rows, List<? extends SScored<FormulaResult, ? extends FormulaScore>> results, boolean all) {
private void addToRows(List<CanopusSummaryRow> rows, List<FormulaResult> results, boolean all) {
// sometimes we have multiple results with same score (adducts!). In this case, we list all of them in
// a separate summary file
int i = 0;
SScored<FormulaResult, ? extends FormulaScore> hit;
FormulaResult hit;
ArrayList<ProbabilityFingerprint> cfFingerprints = new ArrayList<>();
ArrayList<ProbabilityFingerprint> npcFingerprints = new ArrayList<>();
ArrayList<MolecularFormula> formulas = new ArrayList<>(), preForms = new ArrayList<>();
ArrayList<PrecursorIonType> ionTypes = new ArrayList<>();
FormulaResultId id;
do {
hit = results.get(i);
id = hit.getCandidate().getId();
final Optional<CanopusResult> cr = hit.getCandidate().getAnnotation(CanopusResult.class);
id = hit.getId();
final Optional<CanopusResult> cr = hit.getAnnotation(CanopusResult.class);
final var cid = id;
cr.ifPresent(canopusResult -> {
cfFingerprints.add(canopusResult.getCanopusFingerprint());
Expand All @@ -185,7 +185,7 @@ private void addToRows(List<CanopusSummaryRow> rows, List<? extends SScored<Form
preForms.add(cid.getPrecursorFormula());
});
++i;
} while (i < results.size() && (results.get(i).getCandidate().getId().getPrecursorFormula().equals(results.get(0).getCandidate().getId().getPrecursorFormula()) || all));
} while (i < results.size() && (results.get(i).getId().getPrecursorFormula().equals(results.get(0).getId().getPrecursorFormula()) || all));
if (cfFingerprints.size() > 0) {
lock.writeLock().lock();
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@
import de.unijena.bioinf.ChemistryBase.chem.MolecularFormula;
import de.unijena.bioinf.ChemistryBase.chem.PrecursorIonType;
import de.unijena.bioinf.ChemistryBase.chem.RetentionTime;
import de.unijena.bioinf.ChemistryBase.fp.*;
import de.unijena.bioinf.ChemistryBase.ms.ft.FTree;
import de.unijena.bioinf.ChemistryBase.ms.ft.TreeStatistics;
import de.unijena.bioinf.ChemistryBase.utils.Utils;
import de.unijena.bioinf.GibbsSampling.ZodiacScore;
import de.unijena.bioinf.canopus.CanopusResult;
import de.unijena.bioinf.elgordo.LipidSpecies;
import de.unijena.bioinf.fingerid.ConfidenceScore;
import de.unijena.bioinf.fingerid.blast.TopCSIScore;
Expand All @@ -50,16 +52,32 @@
import java.util.stream.Collectors;

public class FormulaSummaryWriter extends CandidateSummarizer {
public static double getCanopusScore(FormulaResult r) {
return r.getAnnotation(CanopusResult.class).map(c -> {
ProbabilityFingerprint fp = c.getCanopusFingerprint();
FingerprintVersion v = fp.getFingerprintVersion();
if (v instanceof MaskedFingerprintVersion)
v = ((MaskedFingerprintVersion) v).getMaskedFingerprintVersion();
return fp.getProbability(((ClassyFireFingerprintVersion) v).
getIndexOfMolecularProperty(((ClassyFireFingerprintVersion) v).getPrimaryClass(fp)));
}).orElse(0d);
}

final static List<Class<? extends FormulaScore>> RANKING_SCORES = List.of(ZodiacScore.class, SiriusScore.class, TreeScore.class, IsotopeScore.class, TopCSIScore.class);
final static List<Class<? extends FormulaScore>> RANKING_SCORES_SELECTING_TOP1 = List.of(ZodiacScore.class, SiriusScore.class, TreeScore.class, IsotopeScore.class);

public final static Comparator<FormulaResult> FROMULA_COMPARATOR = ((Comparator<FormulaResult>) (o1, o2) ->
FormulaScoring.comparingMultiScore(RANKING_SCORES, true)
.compare(o1.getAnnotationOrNull(FormulaScoring.class),
o2.getAnnotationOrNull(FormulaScoring.class))
).thenComparing(Comparator.comparing(FormulaSummaryWriter::getCanopusScore).reversed());


private final Lock lock = new ReentrantLock();

final LinkedHashMap<Class<? extends FormulaScore>, String> globalTypes = new LinkedHashMap<>();
final Set<ResultEntry> globalResults;
final Set<ResultEntry> globalResultsWithAdducts;
final Map<ResultEntry, List<SScored<ResultEntry, ? extends FormulaScore>>> globalResultsAll;
final Map<ResultEntry, List<ResultEntry>> globalResultsAll;

public FormulaSummaryWriter(boolean writeTopHitGlobal, boolean writeTopHitWithAdductsGlobal, boolean writeFullGlobal) {
super(writeTopHitGlobal, writeTopHitWithAdductsGlobal, writeFullGlobal);
Expand All @@ -82,23 +100,18 @@ public void addWriteCompoundSummary(ProjectWriter writer, @NotNull CompoundConta
if (formulaResults == null || formulaResults.isEmpty())
return;

List<SScored<FormulaResult, ? extends FormulaScore>> res = FormulaScoring.reRankBy(formulaResults, RANKING_SCORES, true);

if (res.isEmpty())
return;

List<SScored<ResultEntry, ? extends FormulaScore>> results = new ArrayList<>(res.size());
List<FormulaResult> res = formulaResults.stream().map(SScored::getCandidate).sorted(FROMULA_COMPARATOR).toList();
List<ResultEntry> results = new ArrayList<>(res.size());
int rank = 0;
MolecularFormula preFormula = null;
for (SScored<FormulaResult, ? extends FormulaScore> s : res) {
FormulaResult c = s.getCandidate();
for (FormulaResult c : res) {
if (preFormula == null || !c.getId().getPrecursorFormula().equals(preFormula))
rank++;
preFormula = c.getId().getPrecursorFormula();

results.add(new SScored<>(ResultEntry.of(c, exp, rank), s.getScoreObject()));
results.add(ResultEntry.of(c, exp, rank));
}
List<? extends SScored<ResultEntry, ? extends FormulaScore>> topResultWithAdducts = extractAllTopScoringResults(results, RANKING_SCORES_SELECTING_TOP1);
List<ResultEntry> topResultWithAdducts = extractAllTopScoringResults(results);


writer.inDirectory(exp.getId().getDirectoryName(), () -> {
Expand All @@ -107,14 +120,12 @@ public void addWriteCompoundSummary(ProjectWriter writer, @NotNull CompoundConta

final AtomicBoolean first = new AtomicBoolean(true);

results.forEach(r -> {
FormulaScoring s = r.getCandidate().getScoring();
if (first.getAndSet(false)) {
if (globalResults != null) {
ResultEntry bestResult = /*topResultWithAdducts.size() > 1 ? resolveIonizationOnly(r.getCandidate()) :*/ r.getCandidate();
withLock(() -> this.globalResults.add(bestResult));
}
}
results.forEach(c -> {
FormulaScoring s = c.getScoring();
if (first.getAndSet(false))
if (globalResults != null)
withLock(() -> this.globalResults.add(c));

s.annotations().forEach((key, value) -> {
if (value != null && !value.isNa()) {
types.putIfAbsent(value.getClass(), value.name());
Expand All @@ -128,10 +139,9 @@ public void addWriteCompoundSummary(ProjectWriter writer, @NotNull CompoundConta
types.remove(ConfidenceScore.class);
List<ResultEntry> r = results.stream()
.sorted((i1, i2) -> FormulaScoring.comparingMultiScore(
ProjectSpaceManager.scorePriorities()
.stream().filter(types::containsKey).toList()).compare(
i1.getCandidate().scoring,
i2.getCandidate().scoring)).map(SScored::getCandidate)
ProjectSpaceManager.scorePriorities()
.stream().filter(types::containsKey).toList())
.compare(i1.scoring, i2.scoring))
.toList();
writeCSV(w, types, r, false);
});
Expand All @@ -142,24 +152,19 @@ public void addWriteCompoundSummary(ProjectWriter writer, @NotNull CompoundConta
if (globalResultsWithAdducts != null || globalResultsAll != null)
withLock(() -> {
if (globalResultsWithAdducts != null)
topResultWithAdducts.forEach(r -> this.globalResultsWithAdducts.add(r.getCandidate()));
this.globalResultsWithAdducts.addAll(topResultWithAdducts);
if (globalResultsAll != null)
globalResultsAll.put(results.get(0).getCandidate(), new ArrayList<>(results));
globalResultsAll.put(results.get(0), new ArrayList<>(results));
});
}

private List<SScored<ResultEntry, ? extends FormulaScore>> extractAllTopScoringResults(List<? extends SScored<ResultEntry, ? extends FormulaScore>> sortedResults, List<Class<? extends FormulaScore>> rankingScores) {
private List<ResultEntry> extractAllTopScoringResults(List<ResultEntry> sortedResults) {
if (sortedResults.isEmpty()) return Collections.emptyList();
if (sortedResults.size() == 1) return Collections.singletonList(sortedResults.get(0));

SScored<ResultEntry, ? extends FormulaScore> best = sortedResults.get(0);

List<SScored<ResultEntry, ? extends FormulaScore>> topResultsWithAdducts = sortedResults.stream()
.takeWhile(r -> best.getCandidate().preFormula.equals(r.getCandidate().preFormula))
.collect(Collectors.toList());

ResultEntry best = sortedResults.get(0);
//candidates with same score should have the same adduct.
return topResultsWithAdducts;
return sortedResults.stream().takeWhile(r -> best.preFormula.equals(r.preFormula)).toList();
}

@Override
Expand All @@ -184,7 +189,7 @@ public void writeProjectSpaceSummary(ProjectWriter writer) throws IOException {
if (globalResultsAll != null) {
List<ResultEntry> all = globalResultsAll.keySet().stream()
.sorted(Comparator.comparing(s -> s.dirName, Utils.ALPHANUMERIC_COMPARATOR))
.flatMap(s -> globalResultsAll.get(s).stream().map(SScored::getCandidate)).toList();
.flatMap(s -> globalResultsAll.get(s).stream()).toList();
writer.textFile(SummaryLocations.FORMULA_SUMMARY_ALL, w -> writeCSV(w, globalTypes, all, true));
}
} finally {
Expand Down Expand Up @@ -254,7 +259,7 @@ private void writeCSV(Writer w, LinkedHashMap<Class<? extends FormulaScore>, Str
}

private static class ResultEntry {
private int formulaRank;
private final int formulaRank;
public final FormulaScoring scoring;
public final MolecularFormula molecularFormula;
public final MolecularFormula preFormula;
Expand Down

0 comments on commit 7f05cd9

Please sign in to comment.