Skip to content

Commit

Permalink
Merge pull request #340 from pymzml/dev
Browse files Browse the repository at this point in the history
Release
  • Loading branch information
fu authored Sep 15, 2023
2 parents 156ffa8 + 7d530a5 commit 33d5f18
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 87 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tox_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
black:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Run black
uses: actions/setup-python@v4
with:
Expand All @@ -23,7 +23,7 @@ jobs:
matrix:
python: ['3.8', '3.9', '3.10']
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
with:
Expand Down
35 changes: 34 additions & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,35 @@
# Read the Docs configuration file for Sphinx projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

# Set the OS, Python version and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.11"
# You can also specify other tool versions:
# nodejs: "20"
# rust: "1.70"
# golang: "1.20"

# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: docs/source/conf.py
# You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
# builder: "dirhtml"
# Fail on all warnings to avoid broken references
# fail_on_warning: true

# Optionally build your docs in additional formats such as PDF and ePub
# formats:
# - pdf
# - epub

# Optional but recommended, declare the Python requirements required
# to build your documentation
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
python:
version: 3
install:
- requirements: docs/requirements.txt
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
sphinx==6.1.3
sphinx==7.2.5
sphinx_rtd_theme==0.5.0
regex
11 changes: 11 additions & 0 deletions pymzml/obo.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ class OboTranslator(object):
version (str): obo version
"""

_obo_instance_cache = {}

def __init__(self, version=None):
self.version = self.__normalize_version(version)
self.all_dicts = []
Expand All @@ -108,6 +110,15 @@ def __init__(self, version=None):
# Only parse the OBO when necessary, not upon object construction
self.__obo_parsed = False

@classmethod
def from_cache(cls, version):
version = cls.__normalize_version(version)
try:
return cls._obo_instance_cache[version]
except KeyError:
inst = cls._obo_instance_cache[version] = cls(version)
return inst

def __setitem__(self, key, value):
raise TypeError("OBO translator dictionaries only support assignment via .add")

Expand Down
30 changes: 23 additions & 7 deletions pymzml/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,24 +155,22 @@ def __next__(self):
event, element = next(self.iter, ("END", "END"))
if event == "end":
if element.tag.endswith("}spectrum"):
spectrum = spec.Spectrum(element)
spectrum = spec.Spectrum(element, obo_version=self.OT.version)
if has_ref_group:
spectrum._set_params_from_reference_group(
self.info["referenceable_param_group_list_element"]
)
ms_level = spectrum.ms_level
spectrum.measured_precision = self.ms_precisions[ms_level]
spectrum.calling_instance = self
return spectrum
if element.tag.endswith("}chromatogram"):
if self.skip_chromatogram:
continue
spectrum = spec.Chromatogram(element)
spectrum = spec.Chromatogram(element, obo_version=self.OT.version)
# if has_ref_group:
# spectrum._set_params_from_reference_group(
# self.info['referenceable_param_group_list_element']
# )
spectrum.calling_instance = self
return spectrum
elif event == "END":
# reinit iter
Expand Down Expand Up @@ -201,7 +199,7 @@ def __getitem__(self, identifier):
except:
pass
spectrum = self.info["file_object"][identifier]
spectrum.calling_instance = self
spectrum.obo_translator = self.OT
if isinstance(spectrum, spec.Spectrum):
spectrum.measured_precision = self.ms_precisions[spectrum.ms_level]
return spectrum
Expand Down Expand Up @@ -273,7 +271,6 @@ def _determine_file_encoding(self, path):

@staticmethod
def _obo_version_validator(version):

"""
The obo version should fit file names in the obo folder.
However, some software generate mzML with built in obo version string like:
Expand Down Expand Up @@ -347,7 +344,7 @@ def _init_obo_translator(self):
# required) ...
if self.info.get("obo_version", None) is None:
self.info["obo_version"] = "1.1.0"
obo_translator = obo.OboTranslator(version=self.info["obo_version"])
obo_translator = obo.OboTranslator.from_cache(version=self.info["obo_version"])

return obo_translator

Expand Down Expand Up @@ -386,9 +383,27 @@ def _init_iter(self):
obo_in_mzml = element.attrib.get("version", "1.1.0")
self.info["obo_version"] = self._obo_version_validator(obo_in_mzml)

elif element.tag.endswith("}fileDescription"):
self.info["file_description"] = True
self.info["file_description_element"] = element
elif element.tag.endswith("}sampleList"):
self.info["sample_list"] = True
self.info["sample_list_element"] = element

elif element.tag.endswith("}referenceableParamGroupList"):
self.info["referenceable_param_group_list"] = True
self.info["referenceable_param_group_list_element"] = element

elif element.tag.endswith("}softwareList"):
self.info["software_list"] = True
self.info["software_list_element"] = element
elif element.tag.endswith("}instrumentConfigurationList"):
self.info["instrument_configuration_list"] = True
self.info["instrument_configuration_list_element"] = element
elif element.tag.endswith("}dataProcessingList"):
self.info["data_processing_list"] = True
self.info["data_processing_list_element"] = element

elif element.tag.endswith("}spectrumList"):
spec_cnt = element.attrib.get("count")
self.info["spectrum_count"] = int(spec_cnt) if spec_cnt else None
Expand All @@ -401,6 +416,7 @@ def _init_iter(self):
elif element.tag.endswith("}run"):
run_id = element.attrib.get("id")
start_time = element.attrib.get("startTimeStamp")
self.info["run_element"] = element
self.info["run_id"] = run_id
self.info["start_time"] = start_time
else:
Expand Down
91 changes: 20 additions & 71 deletions pymzml/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
pass
from . import regex_patterns
from .decoder import MSDecoder
from .obo import OboTranslator

PROTON = 1.00727646677
ISOTOPE_AVERAGE_DIFFERENCE = 1.002
Expand All @@ -74,22 +75,6 @@ class MS_Spectrum(object):
General spectrum class for data handling.
"""

# __slots__ = [
# # '_read_accessions',
# # 'get_element_by_name',
# # 'get_element_by_path',
# # '_register',
# # 'precursors',
# # '_get_encoding_parameters',
# # 'measured_precision',
# # '_decode_to_numpy',
# # '_median',
# # 'to_string',
# ]
def __init__(self):
"""."""
pass

def _read_accessions(self):
"""Set all required variables for this spectrum."""
self.accessions = {}
Expand Down Expand Up @@ -175,8 +160,6 @@ def _get_encoding_parameters(self, array_type):
"""
numpress_encoding = False

# array_type_accession = self.calling_instance.OT[array_type]["id"]

b_data_string = "./{ns}binaryDataArrayList/{ns}binaryDataArray/{ns}cvParam[@name='{name}']/..".format(
ns=self.ns, name=array_type
)
Expand Down Expand Up @@ -204,7 +187,7 @@ def _get_encoding_parameters(self, array_type):
d_type = b_data_array.find(
float_type_string.format(
ns=self.ns,
Acc=self.calling_instance.OT["32-bit float"]["id"],
Acc=self.obo_translator["32-bit float"]["id"],
)
).get("name")
except:
Expand All @@ -213,7 +196,7 @@ def _get_encoding_parameters(self, array_type):
d_type = b_data_array.find(
float_type_string.format(
ns=self.ns,
Acc=self.calling_instance.OT["64-bit float"]["id"],
Acc=self.obo_translator["64-bit float"]["id"],
)
).get("name")
except:
Expand All @@ -222,9 +205,7 @@ def _get_encoding_parameters(self, array_type):
d_type = b_data_array.find(
float_type_string.format(
ns=self.ns,
Acc=self.calling_instance.OT["32-bit integer"][
"id"
],
Acc=self.obo_translator["32-bit integer"]["id"],
)
).get("name")
except:
Expand All @@ -233,17 +214,15 @@ def _get_encoding_parameters(self, array_type):
d_type = b_data_array.find(
float_type_string.format(
ns=self.ns,
Acc=self.calling_instance.OT["64-bit integer"][
"id"
],
Acc=self.obo_translator["64-bit integer"]["id"],
)
).get("name")
except:
# null-terminated ASCII string
d_type = b_data_array.find(
float_type_string.format(
ns=self.ns,
Acc=self.calling_instance.OT[
Acc=self.obo_translator[
"null-terminated ASCII string"
]["id"],
)
Expand Down Expand Up @@ -302,7 +281,6 @@ def _decode_to_numpy(self, data, d_array_length, data_type, comp):
or "MS-Numpress linear prediction compression" in comp
or "MS-Numpress short logged float compression" in comp
):

out_data = self._decodeNumpress_to_array(out_data, comp)
if data_type == "32-bit float":
# one character code may be sufficient too (f)
Expand Down Expand Up @@ -426,8 +404,13 @@ class Spectrum(MS_Spectrum):
"""

def __init__(self, element=ElementTree.Element(""), measured_precision=5e-6):

def __init__(
self,
element=ElementTree.Element(""),
measured_precision=5e-6,
*,
obo_version=None,
):
__slots__ = [
"_centroided_peaks",
"_centroided_peaks_sorted_by_i",
Expand Down Expand Up @@ -480,7 +463,7 @@ def __init__(self, element=ElementTree.Element(""), measured_precision=5e-6):
self._transformed_mass_with_error = None
self._transformed_mz_with_error = None
self._transformed_peaks = None
self.calling_instance = None
self.obo_translator = OboTranslator.from_cache(obo_version)
self.element = element
self.measured_precision = measured_precision
self.noise_level_estimate = {}
Expand Down Expand Up @@ -662,7 +645,7 @@ def __getitem__(self, accession):
else:
if not accession.startswith("MS:"):
try:
accession = self.calling_instance.OT[accession]["id"]
accession = self.obo_translator[accession]["id"]
except TypeError:
accession = "---"
search_string = './/*[@accession="{0}"]'.format(accession)
Expand Down Expand Up @@ -1003,6 +986,7 @@ def selected_precursors(self):
("i", i_values),
("charge", charges),
("precursor id", ids),
("element", precursors),
]:
try:
dict_2_save[key] = list_of_values[pos]
Expand Down Expand Up @@ -1246,11 +1230,9 @@ def _centroid_peaks(self):
"""

try:
profile_ot = self.calling_instance.OT.name.get("profile spectrum", None)
profile_ot = self.obo_translator.name.get("profile spectrum", None)
if profile_ot is None:
profile_ot = self.calling_instance.OT.name.get(
"profile mass spectrum", None
)
profile_ot = self.obo_translator.name.get("profile mass spectrum", None)
acc = profile_ot["id"]
is_profile = (
True
Expand Down Expand Up @@ -1332,17 +1314,6 @@ def _reprofile_Peaks(self):
self.set_peaks(None, "centroided")
return tmp

def _register(self, decoded_tuple):
d_type, array = decoded_tuple
if d_type == "mz":
self._mz = array
elif d_type == "i":
self._i = array
elif d_type == "time":
self._time = array
else:
raise Exception("Unknown data Type ({0})".format(d_type))

def _mz_2_mass(self, mz, charge):
"""
Calculate the uncharged mass for a given mz value
Expand Down Expand Up @@ -1766,7 +1737,7 @@ class Chromatogram(MS_Spectrum):
Class for Chromatogram access and handling.
"""

def __init__(self, element, measured_precision=5e-6, param=None):
def __init__(self, element, measured_precision=5e-6, *, obo_version=None):
"""
Arguments:
element (xml.etree.ElementTree.Element): spectrum as xml Element
Expand All @@ -1775,29 +1746,6 @@ def __init__(self, element, measured_precision=5e-6, param=None):
measured_precision (float): in ppm, i.e. 5e-6 equals to 5 ppm.
param (dict): parameter mapping for this spectrum
"""

__slots__ = [
"_measured_precision",
"element",
"noise_level_estimate",
"_time",
"_i",
"_t_mass_set",
"_peaks",
"_t_mz_set",
"_centroided_peaks",
"_reprofiled_peaks",
"_deconvoluted_peaks",
"_profile",
"_extreme_values",
"_centroided_peaks_sorted_by_i",
"_transformed_mz_with_error",
"_transformed_mass_with_error",
"_precursors",
"_ID",
"internal_precision",
]

self._measured_precision = measured_precision
self.element = element
self.noise_level_estimate = {}
Expand All @@ -1818,6 +1766,7 @@ def __init__(self, element, measured_precision=5e-6, param=None):
self._transformed_mass_with_error = None
self._precursors = None
self._ID = None
self.obo_translator = OboTranslator.from_cache(obo_version)

if self.element:
# self._read_accessions()
Expand Down
2 changes: 1 addition & 1 deletion pymzml/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.5.2
2.5.3
Loading

0 comments on commit 33d5f18

Please sign in to comment.