diff --git a/sirius_cli/src/main/java/de/unijena/bioinf/ms/frontend/subtools/export/mgf/MgfExporterOptions.java b/sirius_cli/src/main/java/de/unijena/bioinf/ms/frontend/subtools/export/mgf/MgfExporterOptions.java index ebe6227aed..e144723b0b 100644 --- a/sirius_cli/src/main/java/de/unijena/bioinf/ms/frontend/subtools/export/mgf/MgfExporterOptions.java +++ b/sirius_cli/src/main/java/de/unijena/bioinf/ms/frontend/subtools/export/mgf/MgfExporterOptions.java @@ -49,6 +49,9 @@ public MgfExporterOptions() { @CommandLine.Option(names = "--merge-ms2", description = "Merge all MS2 of a compound into one single spectrum.") public boolean mergeMs2; + @CommandLine.Option(names = "--feature-id", description = "If available use the feature ids from the input data instead of the SIRIUS internal id. internal id will be used as fallback if the given feature ids are not available or contain duplicates.") + public boolean featureId; + @CommandLine.Option(names = "--quant-table", description = "Quantification table file name for Feature Based Molecular Networking.") public File quantTable; diff --git a/sirius_cli/src/main/java/de/unijena/bioinf/ms/frontend/subtools/export/mgf/MgfExporterWorkflow.java b/sirius_cli/src/main/java/de/unijena/bioinf/ms/frontend/subtools/export/mgf/MgfExporterWorkflow.java index 357bd501f6..911db68054 100644 --- a/sirius_cli/src/main/java/de/unijena/bioinf/ms/frontend/subtools/export/mgf/MgfExporterWorkflow.java +++ b/sirius_cli/src/main/java/de/unijena/bioinf/ms/frontend/subtools/export/mgf/MgfExporterWorkflow.java @@ -28,10 +28,12 @@ import de.unijena.bioinf.ChemistryBase.ms.lcms.QuantificationMeasure; import de.unijena.bioinf.ChemistryBase.ms.lcms.QuantificationTable; import de.unijena.bioinf.ChemistryBase.utils.FileUtils; +import de.unijena.bioinf.ChemistryBase.utils.Utils; import de.unijena.bioinf.babelms.mgf.MgfWriter; import de.unijena.bioinf.ms.frontend.subtools.PreprocessingJob; import de.unijena.bioinf.ms.frontend.workflow.Workflow; import de.unijena.bioinf.ms.properties.ParameterConfig; +import de.unijena.bioinf.projectspace.CompoundContainerId; import de.unijena.bioinf.projectspace.Instance; import de.unijena.bioinf.projectspace.ProjectSpaceManager; import org.apache.commons.text.translate.CsvTranslators; @@ -44,7 +46,9 @@ import java.nio.file.Path; import java.util.*; import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; /** * Standalone-Tool to export spectra to mgf format. @@ -55,6 +59,8 @@ public class MgfExporterWorkflow implements Workflow { private final PreprocessingJob> ppj; private final Optional quantPath; + private final AtomicBoolean useFeatureId; + public MgfExporterWorkflow(PreprocessingJob> ppj, MgfExporterOptions options, ParameterConfig config) { outputPath = options.output; @@ -62,6 +68,7 @@ public MgfExporterWorkflow(PreprocessingJob> ppj, M mgfWriter = new MgfWriter(options.writeMs1, options.mergeMs2, mergeMs2Deviation, true); this.ppj = ppj; this.quantPath = Optional.ofNullable(options.quantTable).map(File::toPath); + this.useFeatureId = new AtomicBoolean(options.featureId); } @@ -69,22 +76,43 @@ public MgfExporterWorkflow(PreprocessingJob> ppj, M public void run() { try { final Iterable ps = SiriusJobs.getGlobalJobManager().submitJob(ppj).awaitResult(); - final boolean zeroIndex; - if (ps instanceof ProjectSpaceManager) { - zeroIndex = ((ProjectSpaceManager) ps).projectSpace().getMinIndex().orElse(1) <= 0; - } else { + boolean zeroIndex; + { final AtomicInteger minIndex = new AtomicInteger(Integer.MAX_VALUE); - ps.forEach(i -> minIndex.set(Math.min(minIndex.get(),i.getID().getCompoundIndex()))); + final AtomicInteger size = new AtomicInteger(0); + final Set ids = new HashSet<>(); + final Consumer checkId = id -> { + size.incrementAndGet(); + minIndex.set(Math.min(minIndex.get(), id.getCompoundIndex())); + if (useFeatureId.get()) + id.getFeatureId().ifPresentOrElse(ids::add, (() -> useFeatureId.set(false))); + }; + + if (ps instanceof ProjectSpaceManager) + ((ProjectSpaceManager) ps).projectSpace().forEach(checkId); + else + ps.forEach(i -> checkId.accept(i.getID())); + + if (ids.size() < size.get()) + useFeatureId.set(false); zeroIndex = minIndex.get() <= 0; + if (useFeatureId.get()){ + LoggerFactory.getLogger(getClass()).info("Using imported/generated feature ids."); + }else { + LoggerFactory.getLogger(getClass()).info("Using SIRIUS internal IDs as feature ids."); + if (zeroIndex) + LoggerFactory.getLogger(getClass()).warn("Index value 0 found (old project-space format). Using index + 1 as Feature ID to make them compatible with GNPS FBMN."); + } } - if (zeroIndex) - LoggerFactory.getLogger("Index value 0 found (old project-space format). Using index + 1 as Feature ID to be compatible with GNPS FBMN."); try (final BufferedWriter writer = Files.newBufferedWriter(outputPath)) { for (Instance inst : ps) { try { - mgfWriter.write(writer, inst.getExperiment(), String.valueOf(zeroIndex ? inst.getID().getCompoundIndex() + 1 : inst.getID().getCompoundIndex())); + final String fid = useFeatureId.get() && inst.getID().getFeatureId().isPresent() + ? inst.getID().getFeatureId().get() + : extractFid(inst, zeroIndex); + mgfWriter.write(writer, inst.getExperiment(), fid); } catch (IOException e) { throw e; } catch (Exception e) { @@ -109,6 +137,10 @@ public void run() { } } + private String extractFid(Instance inst, boolean zeroIndex){ + return String.valueOf(zeroIndex ? inst.getID().getCompoundIndex() + 1 : inst.getID().getCompoundIndex()); + } + private void writeQuantifiactionTable(Iterable ps, Path path, boolean zeroIndex) throws IOException { final HashMap compounds = new HashMap<>(); final Set sampleNames = new HashSet<>(); @@ -118,8 +150,11 @@ private void writeQuantifiactionTable(Iterable ps, Path path, boolean final Ms2Experiment experiment = i.getExperiment(); getQuantificationTable(i, experiment).ifPresent(quant -> { for (int j = 0; j < quant.length(); ++j) sampleNames.add(quant.getName(j)); - String id = String.valueOf(zeroIndex ? i.getID().getCompoundIndex() + 1 : i.getID().getCompoundIndex()); - compounds.put(id, new QuantInfo( + final String fid = useFeatureId.get() && i.getID().getFeatureId().isPresent() + ? i.getID().getFeatureId().get() + : extractFid(i, zeroIndex); + + compounds.put(fid, new QuantInfo( experiment.getIonMass(), experiment.getAnnotation(RetentionTime.class).orElse(new RetentionTime(0d)).getRetentionTimeInSeconds() / 60d, //use min quant @@ -132,9 +167,9 @@ private void writeQuantifiactionTable(Iterable ps, Path path, boolean // now write data ArrayList compoundNames = new ArrayList<>(compounds.keySet()); - Collections.sort(compoundNames); + compoundNames.sort(Utils.ALPHANUMERIC_COMPARATOR); ArrayList sampleNameList = new ArrayList<>(sampleNames); - Collections.sort(sampleNameList); + sampleNameList.sort(Utils.ALPHANUMERIC_COMPARATOR); bw.write("row ID,row m/z,row retention time"); CsvTranslators.CsvEscaper escaper = new CsvTranslators.CsvEscaper(); for (String sample : sampleNameList) { @@ -170,7 +205,7 @@ private Optional getQuantificationTable(Instance i, Ms2Expe return lcms.isEmpty() ? Optional.empty() : Optional.of(lcms.getQuantificationTable()); } - private String toQuantSuffix(QuantificationMeasure m){ + private String toQuantSuffix(QuantificationMeasure m) { return switch (m) { case APEX -> " Peak height"; case INTEGRAL, INTEGRAL_FWHMD -> " Peak area"; diff --git a/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/CanopusSummaryWriter.java b/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/CanopusSummaryWriter.java index 392d62ad01..cd2423f933 100644 --- a/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/CanopusSummaryWriter.java +++ b/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/CanopusSummaryWriter.java @@ -55,12 +55,13 @@ protected static class CanopusSummaryRow { private final PrecursorIonType[] ionTypes; private final String id; + private final String featureId; private final int best; private ClassyFireFingerprintVersion CLF; NPCFingerprintVersion NPCF; - public CanopusSummaryRow(ProbabilityFingerprint[] cfClassifications, ProbabilityFingerprint[] npcClassifications, MolecularFormula[] molecularFormulas, MolecularFormula[] precursorFormulas, PrecursorIonType[] ionTypes, String id) { + public CanopusSummaryRow(ProbabilityFingerprint[] cfClassifications, ProbabilityFingerprint[] npcClassifications, MolecularFormula[] molecularFormulas, MolecularFormula[] precursorFormulas, PrecursorIonType[] ionTypes, String id, String featureId) { this.cfClassifications = cfClassifications; this.npcClassifications = npcClassifications; this.molecularFormulas = molecularFormulas; @@ -68,6 +69,7 @@ public CanopusSummaryRow(ProbabilityFingerprint[] cfClassifications, Probability this.mostSpecificClasses = new ClassyfireProperty[molecularFormulas.length]; this.ionTypes = ionTypes; this.id = id; + this.featureId = featureId; this.best = chooseBestAndAssignPrimaryClasses(cfClassifications); bestNPCProps = new NPCFingerprintVersion.NPCProperty[molecularFormulas.length][3]; @@ -186,7 +188,8 @@ private void addToRows(List rows, List { int k = 0; @@ -299,6 +302,8 @@ public String[] next() { cols[i++] = Joiner.on("; ").join(row.cfClassifications[row.best].asDeterministic().asArray().presentFingerprints().asMolecularPropertyIterator()); // cols[i++] = Joiner.on("; ").join(row.npcClassifications[row.best].asDeterministic().asArray().presentFingerprints().asMolecularPropertyIterator()); + + cols[i++] = row.featureId; ++k; return cols; @@ -384,6 +389,9 @@ public String[] next() { cols[i++] = Joiner.on("; ").join(row.cfClassifications[j].asDeterministic().asArray().presentFingerprints().asMolecularPropertyIterator()); // cols[i++] = Joiner.on("; ").join(row.npcClassifications[j].asDeterministic().asArray().presentFingerprints().asMolecularPropertyIterator()); + cols[i++] = row.featureId; + + ++j; if (j >= rows.get(k).cfClassifications.length) { j = 0; diff --git a/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/FormulaSummaryWriter.java b/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/FormulaSummaryWriter.java index 45036aea18..75f5394b9d 100644 --- a/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/FormulaSummaryWriter.java +++ b/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/FormulaSummaryWriter.java @@ -197,7 +197,7 @@ private void writeCSV(Writer w, LinkedHashMap, Str String header = makeHeader(scoreOrder.stream().map(types::get).collect(Collectors.joining("\t"))); if (suffix) - header = header + "\tionMass" + "\tretentionTimeInSeconds" + "\tid"; + header = header + "\tionMass" + "\tretentionTimeInSeconds" + "\tid" + "\tfeatureId"; w.write("rank\tformulaRank\t" + header + "\n"); @@ -245,6 +245,8 @@ private void writeCSV(Writer w, LinkedHashMap, Str w.write(r.retentionTimeSeconds); w.write('\t'); w.write(r.dirName); + w.write('\t'); + w.write(r.featureId); } w.write('\n'); @@ -269,6 +271,8 @@ private static class ResultEntry { public String lipidClass = ""; + public String featureId; + public ResultEntry(FormulaResult r, CompoundContainer exp, int formulaRank) { this.formulaRank = formulaRank; scoring = r.getAnnotationOrThrow(FormulaScoring.class); @@ -289,6 +293,7 @@ public ResultEntry(FormulaResult r, CompoundContainer exp, int formulaRank) { ionMass = BigDecimal.valueOf(exp.getId().getIonMass().orElse(Double.NaN)).setScale(5, RoundingMode.HALF_UP).toString(); retentionTimeSeconds = String.valueOf(exp.getId().getRt().orElse(RetentionTime.NA()).getRetentionTimeInSeconds()); dirName = exp.getId().getDirectoryName(); + featureId = exp.getId().getFeatureId().orElse("N/A"); } public FormulaScoring getScoring() { diff --git a/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/StructureSummaryWriter.java b/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/StructureSummaryWriter.java index bef09be2d6..a88db1eaf2 100644 --- a/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/StructureSummaryWriter.java +++ b/sirius_cli/src/main/java/de/unijena/bioinf/projectspace/summaries/StructureSummaryWriter.java @@ -238,11 +238,11 @@ static Hit toHit(CompoundContainerId id, SScored s.getAnnotationOr(ZodiacScore.class, FormulaScore::NA)).orElse(FormulaScore.NA(ZodiacScore.class)); - return new Hit(confidence + "\t" + line.get(0) + "\t" + zodiacScore + "\t" + siriusScore + "\t" + String.join("\t", line.subList(1, line.size())) + "\t" + id.getIonMass().orElse(Double.NaN) + "\t" + id.getRt().orElse(RetentionTime.NA()).getRetentionTimeInSeconds() + "\t" + id.getDirectoryName() + "\n", confidence, csiScore, formulaRank); + return new Hit(confidence + "\t" + line.get(0) + "\t" + zodiacScore + "\t" + siriusScore + "\t" + String.join("\t", line.subList(1, line.size())) + "\t" + id.getIonMass().orElse(Double.NaN) + "\t" + id.getRt().orElse(RetentionTime.NA()).getRetentionTimeInSeconds() + "\t" + id.getDirectoryName(), confidence, csiScore, formulaRank, id.getFeatureId().orElse("N/A")); } static void write(BufferedWriter w, List data) throws IOException { - w.write("rank\t" + "formulaRank\t" + "#adducts\t" + "#predictedFPs\t" + new ConfidenceScore(0).name() + "\t" + StructureCSVExporter.HEADER_LIST.get(0) + "\t" + new ZodiacScore(0).name() + "\t" + new SiriusScore(0).name() + "\t" + String.join("\t", StructureCSVExporter.HEADER_LIST.subList(1, StructureCSVExporter.HEADER_LIST.size())) + "\t" + "ionMass\t" + "retentionTimeInSeconds\t" + "id" + "\n"); + w.write("rank\t" + "formulaRank\t" + "#adducts\t" + "#predictedFPs\t" + new ConfidenceScore(0).name() + "\t" + StructureCSVExporter.HEADER_LIST.get(0) + "\t" + new ZodiacScore(0).name() + "\t" + new SiriusScore(0).name() + "\t" + String.join("\t", StructureCSVExporter.HEADER_LIST.subList(1, StructureCSVExporter.HEADER_LIST.size())) + "\t" + "ionMass\t" + "retentionTimeInSeconds\t" + "id\t" + "featureId" + "\n"); int rank = 0; for (Hit s : data) { w.write(String.valueOf(++rank)); @@ -254,6 +254,9 @@ static void write(BufferedWriter w, List data) throws IOException { w.write(String.valueOf(s.numberOfFps)); w.write("\t"); w.write(s.line); + w.write("\t"); + w.write(s.featureId); + w.write("\n"); } } @@ -264,12 +267,14 @@ static class Hit { final int formulaRank; int numberOfAdducts = 1; int numberOfFps = 1; + final String featureId; - Hit(String line, ConfidenceScore confidenceScore, double csiScore, int formulaRank) { + Hit(String line, ConfidenceScore confidenceScore, double csiScore, int formulaRank, String featureId) { this.line = line; this.confidenceScore = confidenceScore; this.csiScore = csiScore; this.formulaRank = formulaRank; + this.featureId = featureId; } static Comparator compareByConfidence() { diff --git a/sirius_cli/src/main/resources/sirius_frontend.build.properties b/sirius_cli/src/main/resources/sirius_frontend.build.properties index c5e4af5654..cd0f75bc27 100644 --- a/sirius_cli/src/main/resources/sirius_frontend.build.properties +++ b/sirius_cli/src/main/resources/sirius_frontend.build.properties @@ -1,7 +1,7 @@ #here you can provide properties that may be needed during build- AND during runtime and should not be editable by the user at runtime -de.unijena.bioinf.siriusFrontend.version=5.7.4-SNAPSHOT -de.unijena.bioinf.sirius.version=4.12.14 -de.unijena.bioinf.fingerid.version=2.6.14 +de.unijena.bioinf.siriusFrontend.version=5.8.0 +de.unijena.bioinf.sirius.version=4.13.0 +de.unijena.bioinf.fingerid.version=2.6.15 # de.unijena.bioinf.sirius.http.job.fingerprint.limit = 5000 de.unijena.bioinf.sirius.http.job.canopus.limit = 5000 diff --git a/sirius_gui/src/main/java/de/unijena/bioinf/ms/gui/subtools/export/mgf/MgfExporterConfigPanel.java b/sirius_gui/src/main/java/de/unijena/bioinf/ms/gui/subtools/export/mgf/MgfExporterConfigPanel.java index abab86a324..e2fb6358dc 100644 --- a/sirius_gui/src/main/java/de/unijena/bioinf/ms/gui/subtools/export/mgf/MgfExporterConfigPanel.java +++ b/sirius_gui/src/main/java/de/unijena/bioinf/ms/gui/subtools/export/mgf/MgfExporterConfigPanel.java @@ -29,6 +29,11 @@ public MgfExporterConfigPanel() { getOptionDescriptionByName("merge-ms2").ifPresent(it -> mergeMs2.setToolTipText(GuiUtils.formatToolTip(it))); paras.add(mergeMs2); + JCheckBox featureId = new JCheckBox("Feature ID?", true); + parameterBindings.put("feature-id", () -> "~" + featureId.isSelected()); + getOptionDescriptionByName("feature-id").ifPresent(it -> featureId.setToolTipText(GuiUtils.formatToolTip(it))); + paras.add(featureId); + //merge-ppm final String buf = "merge-ppm"; JSpinner mergePpm = makeGenericOptionSpinner(buf,