Skip to content

Commit

Permalink
Upgrade NuGet packages, and change to new functionality in PSI_Interf…
Browse files Browse the repository at this point in the history
…ace SimpleMZIdentMLReader to use reduced memory when converting
  • Loading branch information
FarmGeek4Life committed Feb 1, 2018
1 parent da9da87 commit cdd050a
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 39 deletions.
80 changes: 44 additions & 36 deletions MzidToTsvConverter/MzidToTsvConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,62 +49,61 @@ public void ConvertToTsv(ConverterOptions options)
public void ConvertToTsv(string mzidPath, string tsvPath, bool showDecoy = true, bool unrollResults = true, bool singleResult = false)
{
var reader = new SimpleMZIdentMLReader();
var data = reader.Read(mzidPath);

var headers = new List<string> {
"#SpecFile", "SpecID", "ScanNum", "FragMethod",
"Precursor", "IsotopeError", "PrecursorError(ppm)", "Charge",
"Peptide", "Protein", "DeNovoScore", "MSGFScore",
"SpecEValue", "EValue", "QValue", "PepQValue" };

// SPECIAL CASE:
// Certain versions of MS-GF+ output incorrect mzid files - the peptides referenced in the peptide_ref attribute in
// SpectrumIdentificationItems was correct, but if there was a modification in the first 3 residues there was at
// least a 50% chance of the PeptideEvidenceRefs within the SpectrumIdentificationItem being incorrect. So, for
// those bad versions, use the peptide_ref rather than the PeptideEvidenceRefs to get the sequence.
var isBadMsGfMzid = false;
if (data.AnalysisSoftwareCvAccession.ToUpper().Contains("MS:1002048") && !string.IsNullOrWhiteSpace(data.AnalysisSoftwareVersion))
using (var data = reader.ReadLowMem(mzidPath))
using (var stream = new StreamWriter(new FileStream(tsvPath, FileMode.Create, FileAccess.Write, FileShare.ReadWrite)))
{
// bad versions: v10280 (introduced), v10282, v2016.01.20, v2016.01.21, v2016.01.29, v2016.02.12, v2016.05.25, v2016.0.13, v2016.06.13, v2016.06.14, v2016.06.15, v2016.06.29, v2016.07.26, v2016.08.31, v2016.09.07, v2016.09.22, v2016.09.23 (fixed with version v2016.10.10)
var badVersions = new[]
var headers = new List<string>
{
"v10280", "v10282", "v2016.01.20", "v2016.01.21", "v2016.01.29", "v2016.02.12", "v2016.05.25", "v2016.0.13", "v2016.06.13", "v2016.06.14",
"v2016.06.15", "v2016.06.29", "v2016.07.26", "v2016.08.31", "v2016.09.07", "v2016.09.22", "v2016.09.23"
"#SpecFile", "SpecID", "ScanNum", "FragMethod", "Precursor", "IsotopeError", "PrecursorError(ppm)", "Charge", "Peptide",
"Protein", "DeNovoScore", "MSGFScore", "SpecEValue", "EValue", "QValue", "PepQValue"
};
foreach (var version in badVersions)

// SPECIAL CASE:
// Certain versions of MS-GF+ output incorrect mzid files - the peptides referenced in the peptide_ref attribute in
// SpectrumIdentificationItems was correct, but if there was a modification in the first 3 residues there was at
// least a 50% chance of the PeptideEvidenceRefs within the SpectrumIdentificationItem being incorrect. So, for
// those bad versions, use the peptide_ref rather than the PeptideEvidenceRefs to get the sequence.
var isBadMsGfMzid = false;
if (data.AnalysisSoftwareCvAccession.ToUpper().Contains("MS:1002048") && !string.IsNullOrWhiteSpace(data.AnalysisSoftwareVersion))
{
if (data.AnalysisSoftwareVersion.Contains(version))
// bad versions: v10280 (introduced), v10282, v2016.01.20, v2016.01.21, v2016.01.29, v2016.02.12, v2016.05.25, v2016.0.13, v2016.06.13, v2016.06.14, v2016.06.15, v2016.06.29, v2016.07.26, v2016.08.31, v2016.09.07, v2016.09.22, v2016.09.23 (fixed with version v2016.10.10)
var badVersions = new[]
{
isBadMsGfMzid = true;
"v10280", "v10282", "v2016.01.20", "v2016.01.21", "v2016.01.29", "v2016.02.12", "v2016.05.25", "v2016.0.13", "v2016.06.13",
"v2016.06.14", "v2016.06.15", "v2016.06.29", "v2016.07.26", "v2016.08.31", "v2016.09.07", "v2016.09.22", "v2016.09.23"
};
foreach (var version in badVersions)
{
if (data.AnalysisSoftwareVersion.Contains(version))
{
isBadMsGfMzid = true;
}
}
}
}
if (isBadMsGfMzid)
{
ShowWarning(string.Format("Warning: file \"{0}\" was created with a version of MS-GF+ that had some erroneous output in the mzid file." +
" Using sequences from the peptide_ref attribute instead of the PeptideEvidenceRef element to try to bypass the issue.", mzidPath));
}

using (var stream = new StreamWriter(new FileStream(tsvPath, FileMode.Create, FileAccess.Write, FileShare.ReadWrite)))
{
stream.WriteLine(string.Join("\t", headers));

if (data.Identifications.Count == 0)
if (isBadMsGfMzid)
{
ShowWarning("Warning: .mzID file does not have any results");
System.Threading.Thread.Sleep(1500);
return;
ShowWarning(string.Format(
"Warning: file \"{0}\" was created with a version of MS-GF+ that had some erroneous output in the mzid file." +
" Using sequences from the peptide_ref attribute instead of the PeptideEvidenceRef element to try to bypass the issue.",
mzidPath));
}

stream.WriteLine(string.Join("\t", headers));

var lastScanNum = 0;
var resultsWritten = 0;
var writtenCount = 0;

foreach (var id in data.Identifications)
{
if (singleResult && id.ScanNum == lastScanNum)
{
continue;
}

writtenCount++;

lastScanNum = id.ScanNum;
var specFile = data.SpectrumFile;
var specId = id.NativeId;
Expand All @@ -114,12 +113,14 @@ public void ConvertToTsv(string mzidPath, string tsvPath, bool showDecoy = true,
{
fragMethod = id.AllParamsDict["AssumedDissociationMethod"];
}

var precursor = id.ExperimentalMz;
var isotopeError = "0";
if (id.AllParamsDict.ContainsKey("IsotopeError"))
{
isotopeError = id.AllParamsDict["IsotopeError"];
}

var adjExpMz = id.ExperimentalMz - IsotopeMass * int.Parse(isotopeError) / id.Charge;
//var precursorError = (id.CalMz - id.ExperimentalMz) / id.CalMz * 1e6;
var precursorError = (adjExpMz - id.CalMz) / id.CalMz * 1e6;
Expand Down Expand Up @@ -189,6 +190,13 @@ public void ConvertToTsv(string mzidPath, string tsvPath, bool showDecoy = true,
}
}
}

if (writtenCount == 0)
{
ShowWarning("Warning: .mzID file does not have any results");
System.Threading.Thread.Sleep(1500);
return;
}
}
}

Expand Down
6 changes: 3 additions & 3 deletions MzidToTsvConverter/MzidToTsvConverter.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<RootNamespace>MzidToTsvConverter</RootNamespace>
<AssemblyName>MzidToTsvConverter</AssemblyName>
<PackageId>MzidToTsvConverter</PackageId>
<Version>1.1.0</Version>
<Version>1.2.0</Version>
<AssemblyVersion>$(Version)</AssemblyVersion>
<FileVersion>$(Version)</FileVersion>
<Description>Converts mzid[.gz] files to the MS-GF+ tsv format. Designed for MS-GF+ mzid files (looks for EValue and SpecEValue scores)</Description>
Expand All @@ -19,8 +19,8 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="PRISM-Library" Version="2.4.38" />
<PackageReference Include="PSI_Interface" Version="1.3.1" />
<PackageReference Include="PRISM-Library" Version="2.4.58" />
<PackageReference Include="PSI_Interface" Version="1.3.5" />
</ItemGroup>

</Project>

0 comments on commit cdd050a

Please sign in to comment.