Skip to content

Commit

Permalink
added feature id support from several input formats. added feature id…
Browse files Browse the repository at this point in the history
… to project wide summary files
  • Loading branch information
mfleisch committed Jul 1, 2023
1 parent 84b9e8e commit 156a560
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ public MgfExporterOptions() {
@CommandLine.Option(names = "--merge-ms2", description = "Merge all MS2 of a compound into one single spectrum.")
public boolean mergeMs2;

@CommandLine.Option(names = "--feature-id", description = "If available use the feature ids from the input data instead of the SIRIUS internal id. internal id will be used as fallback if the given feature ids are not available or contain duplicates.")
public boolean featureId;

@CommandLine.Option(names = "--quant-table", description = "Quantification table file name for Feature Based Molecular Networking.")
public File quantTable;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@
import de.unijena.bioinf.ChemistryBase.ms.lcms.QuantificationMeasure;
import de.unijena.bioinf.ChemistryBase.ms.lcms.QuantificationTable;
import de.unijena.bioinf.ChemistryBase.utils.FileUtils;
import de.unijena.bioinf.ChemistryBase.utils.Utils;
import de.unijena.bioinf.babelms.mgf.MgfWriter;
import de.unijena.bioinf.ms.frontend.subtools.PreprocessingJob;
import de.unijena.bioinf.ms.frontend.workflow.Workflow;
import de.unijena.bioinf.ms.properties.ParameterConfig;
import de.unijena.bioinf.projectspace.CompoundContainerId;
import de.unijena.bioinf.projectspace.Instance;
import de.unijena.bioinf.projectspace.ProjectSpaceManager;
import org.apache.commons.text.translate.CsvTranslators;
Expand All @@ -44,7 +46,9 @@
import java.nio.file.Path;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;

/**
* Standalone-Tool to export spectra to mgf format.
Expand All @@ -55,36 +59,60 @@ public class MgfExporterWorkflow implements Workflow {
private final PreprocessingJob<? extends Iterable<Instance>> ppj;
private final Optional<Path> quantPath;

private final AtomicBoolean useFeatureId;


public MgfExporterWorkflow(PreprocessingJob<? extends Iterable<Instance>> ppj, MgfExporterOptions options, ParameterConfig config) {
outputPath = options.output;
Deviation mergeMs2Deviation = new Deviation(options.ppmDev);
mgfWriter = new MgfWriter(options.writeMs1, options.mergeMs2, mergeMs2Deviation, true);
this.ppj = ppj;
this.quantPath = Optional.ofNullable(options.quantTable).map(File::toPath);
this.useFeatureId = new AtomicBoolean(options.featureId);
}


@Override
public void run() {
try {
final Iterable<Instance> ps = SiriusJobs.getGlobalJobManager().submitJob(ppj).awaitResult();
final boolean zeroIndex;
if (ps instanceof ProjectSpaceManager) {
zeroIndex = ((ProjectSpaceManager<Instance>) ps).projectSpace().getMinIndex().orElse(1) <= 0;
} else {
boolean zeroIndex;
{
final AtomicInteger minIndex = new AtomicInteger(Integer.MAX_VALUE);
ps.forEach(i -> minIndex.set(Math.min(minIndex.get(),i.getID().getCompoundIndex())));
final AtomicInteger size = new AtomicInteger(0);
final Set<String> ids = new HashSet<>();
final Consumer<CompoundContainerId> checkId = id -> {
size.incrementAndGet();
minIndex.set(Math.min(minIndex.get(), id.getCompoundIndex()));
if (useFeatureId.get())
id.getFeatureId().ifPresentOrElse(ids::add, (() -> useFeatureId.set(false)));
};

if (ps instanceof ProjectSpaceManager)
((ProjectSpaceManager<Instance>) ps).projectSpace().forEach(checkId);
else
ps.forEach(i -> checkId.accept(i.getID()));

if (ids.size() < size.get())
useFeatureId.set(false);
zeroIndex = minIndex.get() <= 0;
if (useFeatureId.get()){
LoggerFactory.getLogger(getClass()).info("Using imported/generated feature ids.");
}else {
LoggerFactory.getLogger(getClass()).info("Using SIRIUS internal IDs as feature ids.");
if (zeroIndex)
LoggerFactory.getLogger(getClass()).warn("Index value 0 found (old project-space format). Using index + 1 as Feature ID to make them compatible with GNPS FBMN.");
}
}

if (zeroIndex)
LoggerFactory.getLogger("Index value 0 found (old project-space format). Using index + 1 as Feature ID to be compatible with GNPS FBMN.");

try (final BufferedWriter writer = Files.newBufferedWriter(outputPath)) {
for (Instance inst : ps) {
try {
mgfWriter.write(writer, inst.getExperiment(), String.valueOf(zeroIndex ? inst.getID().getCompoundIndex() + 1 : inst.getID().getCompoundIndex()));
final String fid = useFeatureId.get() && inst.getID().getFeatureId().isPresent()
? inst.getID().getFeatureId().get()
: extractFid(inst, zeroIndex);
mgfWriter.write(writer, inst.getExperiment(), fid);
} catch (IOException e) {
throw e;
} catch (Exception e) {
Expand All @@ -109,6 +137,10 @@ public void run() {
}
}

private String extractFid(Instance inst, boolean zeroIndex){
return String.valueOf(zeroIndex ? inst.getID().getCompoundIndex() + 1 : inst.getID().getCompoundIndex());
}

private void writeQuantifiactionTable(Iterable<Instance> ps, Path path, boolean zeroIndex) throws IOException {
final HashMap<String, QuantInfo> compounds = new HashMap<>();
final Set<String> sampleNames = new HashSet<>();
Expand All @@ -118,8 +150,11 @@ private void writeQuantifiactionTable(Iterable<Instance> ps, Path path, boolean
final Ms2Experiment experiment = i.getExperiment();
getQuantificationTable(i, experiment).ifPresent(quant -> {
for (int j = 0; j < quant.length(); ++j) sampleNames.add(quant.getName(j));
String id = String.valueOf(zeroIndex ? i.getID().getCompoundIndex() + 1 : i.getID().getCompoundIndex());
compounds.put(id, new QuantInfo(
final String fid = useFeatureId.get() && i.getID().getFeatureId().isPresent()
? i.getID().getFeatureId().get()
: extractFid(i, zeroIndex);

compounds.put(fid, new QuantInfo(
experiment.getIonMass(),
experiment.getAnnotation(RetentionTime.class).orElse(new RetentionTime(0d)).getRetentionTimeInSeconds() / 60d, //use min
quant
Expand All @@ -132,9 +167,9 @@ private void writeQuantifiactionTable(Iterable<Instance> ps, Path path, boolean

// now write data
ArrayList<String> compoundNames = new ArrayList<>(compounds.keySet());
Collections.sort(compoundNames);
compoundNames.sort(Utils.ALPHANUMERIC_COMPARATOR);
ArrayList<String> sampleNameList = new ArrayList<>(sampleNames);
Collections.sort(sampleNameList);
sampleNameList.sort(Utils.ALPHANUMERIC_COMPARATOR);
bw.write("row ID,row m/z,row retention time");
CsvTranslators.CsvEscaper escaper = new CsvTranslators.CsvEscaper();
for (String sample : sampleNameList) {
Expand Down Expand Up @@ -170,7 +205,7 @@ private Optional<QuantificationTable> getQuantificationTable(Instance i, Ms2Expe
return lcms.isEmpty() ? Optional.empty() : Optional.of(lcms.getQuantificationTable());
}

private String toQuantSuffix(QuantificationMeasure m){
private String toQuantSuffix(QuantificationMeasure m) {
return switch (m) {
case APEX -> " Peak height";
case INTEGRAL, INTEGRAL_FWHMD -> " Peak area";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,19 +55,21 @@ protected static class CanopusSummaryRow {

private final PrecursorIonType[] ionTypes;
private final String id;
private final String featureId;
private final int best;

private ClassyFireFingerprintVersion CLF;
NPCFingerprintVersion NPCF;

public CanopusSummaryRow(ProbabilityFingerprint[] cfClassifications, ProbabilityFingerprint[] npcClassifications, MolecularFormula[] molecularFormulas, MolecularFormula[] precursorFormulas, PrecursorIonType[] ionTypes, String id) {
public CanopusSummaryRow(ProbabilityFingerprint[] cfClassifications, ProbabilityFingerprint[] npcClassifications, MolecularFormula[] molecularFormulas, MolecularFormula[] precursorFormulas, PrecursorIonType[] ionTypes, String id, String featureId) {
this.cfClassifications = cfClassifications;
this.npcClassifications = npcClassifications;
this.molecularFormulas = molecularFormulas;
this.precursorFormulas = precursorFormulas;
this.mostSpecificClasses = new ClassyfireProperty[molecularFormulas.length];
this.ionTypes = ionTypes;
this.id = id;
this.featureId = featureId;
this.best = chooseBestAndAssignPrimaryClasses(cfClassifications);

bestNPCProps = new NPCFingerprintVersion.NPCProperty[molecularFormulas.length][3];
Expand Down Expand Up @@ -186,7 +188,8 @@ private void addToRows(List<CanopusSummaryRow> rows, List<? extends SScored<Form
formulas.toArray(MolecularFormula[]::new),
preForms.toArray(MolecularFormula[]::new),
ionTypes.toArray(PrecursorIonType[]::new),
id.getParentId().getDirectoryName()
id.getParentId().getDirectoryName(),
id.getParentId().getFeatureId().orElse("N/A")
));
} finally {
lock.writeLock().unlock();
Expand Down Expand Up @@ -218,14 +221,14 @@ public void writeProjectSpaceSummary(ProjectWriter writer) throws IOException {
"ClassyFire#most specific class", "ClassyFire#most specific class Probability", "ClassyFire#level 5",
"ClassyFire#level 5 Probability", "ClassyFire#subclass", "ClassyFire#subclass Probability",
"ClassyFire#class", "ClassyFire#class Probability", "ClassyFire#superclass", "ClassyFire#superclass probability",
/*"NPC#all classifications",*/ "ClassyFire#all classifications"},
/*"NPC#all classifications",*/ "ClassyFire#all classifications", "featureId"},
HEADER2 = new String[]{"id", "molecularFormula", "adduct", "precursorFormula",
"NPC#pathway", "NPC#pathway Probability", "NPC#superclass", "NPC#superclass Probability",
"NPC#class", "NPC#class Probability",
"ClassyFire#most specific class", "ClassyFire#most specific class Probability", "ClassyFire#level 5",
"ClassyFire#level 5 Probability", "ClassyFire#subclass", "ClassyFire#subclass Probability",
"ClassyFire#class", "ClassyFire#class Probability", "ClassyFire#superclass", "ClassyFire#superclass probability",
/*"NPC#all classifications",*/ "ClassyFire#all classifications"};
/*"NPC#all classifications",*/ "ClassyFire#all classifications", "featureId"};

public static class IterateOverFormulas implements Iterator<String[]> {
int k = 0;
Expand Down Expand Up @@ -299,6 +302,8 @@ public String[] next() {

cols[i++] = Joiner.on("; ").join(row.cfClassifications[row.best].asDeterministic().asArray().presentFingerprints().asMolecularPropertyIterator());
// cols[i++] = Joiner.on("; ").join(row.npcClassifications[row.best].asDeterministic().asArray().presentFingerprints().asMolecularPropertyIterator());

cols[i++] = row.featureId;
++k;
return cols;

Expand Down Expand Up @@ -384,6 +389,9 @@ public String[] next() {
cols[i++] = Joiner.on("; ").join(row.cfClassifications[j].asDeterministic().asArray().presentFingerprints().asMolecularPropertyIterator());
// cols[i++] = Joiner.on("; ").join(row.npcClassifications[j].asDeterministic().asArray().presentFingerprints().asMolecularPropertyIterator());

cols[i++] = row.featureId;


++j;
if (j >= rows.get(k).cfClassifications.length) {
j = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ private void writeCSV(Writer w, LinkedHashMap<Class<? extends FormulaScore>, Str

String header = makeHeader(scoreOrder.stream().map(types::get).collect(Collectors.joining("\t")));
if (suffix)
header = header + "\tionMass" + "\tretentionTimeInSeconds" + "\tid";
header = header + "\tionMass" + "\tretentionTimeInSeconds" + "\tid" + "\tfeatureId";

w.write("rank\tformulaRank\t" + header + "\n");

Expand Down Expand Up @@ -245,6 +245,8 @@ private void writeCSV(Writer w, LinkedHashMap<Class<? extends FormulaScore>, Str
w.write(r.retentionTimeSeconds);
w.write('\t');
w.write(r.dirName);
w.write('\t');
w.write(r.featureId);
}

w.write('\n');
Expand All @@ -269,6 +271,8 @@ private static class ResultEntry {

public String lipidClass = "";

public String featureId;

public ResultEntry(FormulaResult r, CompoundContainer exp, int formulaRank) {
this.formulaRank = formulaRank;
scoring = r.getAnnotationOrThrow(FormulaScoring.class);
Expand All @@ -289,6 +293,7 @@ public ResultEntry(FormulaResult r, CompoundContainer exp, int formulaRank) {
ionMass = BigDecimal.valueOf(exp.getId().getIonMass().orElse(Double.NaN)).setScale(5, RoundingMode.HALF_UP).toString();
retentionTimeSeconds = String.valueOf(exp.getId().getRt().orElse(RetentionTime.NA()).getRetentionTimeInSeconds());
dirName = exp.getId().getDirectoryName();
featureId = exp.getId().getFeatureId().orElse("N/A");
}

public FormulaScoring getScoring() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,11 @@ static Hit toHit(CompoundContainerId id, SScored<FormulaResult, ? extends Formul
final ZodiacScore zodiacScore = result.getCandidate().getAnnotation(FormulaScoring.class).
map(s -> s.getAnnotationOr(ZodiacScore.class, FormulaScore::NA)).orElse(FormulaScore.NA(ZodiacScore.class));

return new Hit(confidence + "\t" + line.get(0) + "\t" + zodiacScore + "\t" + siriusScore + "\t" + String.join("\t", line.subList(1, line.size())) + "\t" + id.getIonMass().orElse(Double.NaN) + "\t" + id.getRt().orElse(RetentionTime.NA()).getRetentionTimeInSeconds() + "\t" + id.getDirectoryName() + "\n", confidence, csiScore, formulaRank);
return new Hit(confidence + "\t" + line.get(0) + "\t" + zodiacScore + "\t" + siriusScore + "\t" + String.join("\t", line.subList(1, line.size())) + "\t" + id.getIonMass().orElse(Double.NaN) + "\t" + id.getRt().orElse(RetentionTime.NA()).getRetentionTimeInSeconds() + "\t" + id.getDirectoryName(), confidence, csiScore, formulaRank, id.getFeatureId().orElse("N/A"));
}

static void write(BufferedWriter w, List<Hit> data) throws IOException {
w.write("rank\t" + "formulaRank\t" + "#adducts\t" + "#predictedFPs\t" + new ConfidenceScore(0).name() + "\t" + StructureCSVExporter.HEADER_LIST.get(0) + "\t" + new ZodiacScore(0).name() + "\t" + new SiriusScore(0).name() + "\t" + String.join("\t", StructureCSVExporter.HEADER_LIST.subList(1, StructureCSVExporter.HEADER_LIST.size())) + "\t" + "ionMass\t" + "retentionTimeInSeconds\t" + "id" + "\n");
w.write("rank\t" + "formulaRank\t" + "#adducts\t" + "#predictedFPs\t" + new ConfidenceScore(0).name() + "\t" + StructureCSVExporter.HEADER_LIST.get(0) + "\t" + new ZodiacScore(0).name() + "\t" + new SiriusScore(0).name() + "\t" + String.join("\t", StructureCSVExporter.HEADER_LIST.subList(1, StructureCSVExporter.HEADER_LIST.size())) + "\t" + "ionMass\t" + "retentionTimeInSeconds\t" + "id\t" + "featureId" + "\n");
int rank = 0;
for (Hit s : data) {
w.write(String.valueOf(++rank));
Expand All @@ -254,6 +254,9 @@ static void write(BufferedWriter w, List<Hit> data) throws IOException {
w.write(String.valueOf(s.numberOfFps));
w.write("\t");
w.write(s.line);
w.write("\t");
w.write(s.featureId);
w.write("\n");
}
}

Expand All @@ -264,12 +267,14 @@ static class Hit {
final int formulaRank;
int numberOfAdducts = 1;
int numberOfFps = 1;
final String featureId;

Hit(String line, ConfidenceScore confidenceScore, double csiScore, int formulaRank) {
Hit(String line, ConfidenceScore confidenceScore, double csiScore, int formulaRank, String featureId) {
this.line = line;
this.confidenceScore = confidenceScore;
this.csiScore = csiScore;
this.formulaRank = formulaRank;
this.featureId = featureId;
}

static Comparator<Hit> compareByConfidence() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#here you can provide properties that may be needed during build- AND during runtime and should not be editable by the user at runtime
de.unijena.bioinf.siriusFrontend.version=5.7.4-SNAPSHOT
de.unijena.bioinf.sirius.version=4.12.14
de.unijena.bioinf.fingerid.version=2.6.14
de.unijena.bioinf.siriusFrontend.version=5.8.0
de.unijena.bioinf.sirius.version=4.13.0
de.unijena.bioinf.fingerid.version=2.6.15
#
de.unijena.bioinf.sirius.http.job.fingerprint.limit = 5000
de.unijena.bioinf.sirius.http.job.canopus.limit = 5000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ public MgfExporterConfigPanel() {
getOptionDescriptionByName("merge-ms2").ifPresent(it -> mergeMs2.setToolTipText(GuiUtils.formatToolTip(it)));
paras.add(mergeMs2);

JCheckBox featureId = new JCheckBox("Feature ID?", true);
parameterBindings.put("feature-id", () -> "~" + featureId.isSelected());
getOptionDescriptionByName("feature-id").ifPresent(it -> featureId.setToolTipText(GuiUtils.formatToolTip(it)));
paras.add(featureId);

//merge-ppm
final String buf = "merge-ppm";
JSpinner mergePpm = makeGenericOptionSpinner(buf,
Expand Down

0 comments on commit 156a560

Please sign in to comment.