Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion docs/src/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,20 @@


# -- General configuration ---------------------------------------------------
os.environ["SPHINX_BUILD"] = "1"

# If your documentation needs a minimal Sphinx version, state it here.
# needs_sphinx = "1.0"
# conf.py

# ... other configuration options ...

# Global setup for doctests: this code is executed before any doctest example.
doctest_global_setup = """
from spectrum_utils import spectrum, proforma
import matplotlib
"""


# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
Expand All @@ -44,13 +55,18 @@
"sphinx_markdown_tables", # Support tables in Markdown.
"sphinx.ext.autodoc", # Include documentation from docstrings.
# "sphinx.ext.autosummary", # Generate documentation summary one-liners.
# "sphinx.ext.doctest", # Test code in the documentation.
"sphinx.ext.doctest", # Test code in the documentation.
# "sphinx.ext.coverage", # Collect documentation coverage statistics.
"sphinx.ext.napoleon", # Support NumPy and Google style docstrings.
"sphinx.ext.viewcode", # Add links to the source code.
"sphinx_rtd_theme", # Read-the-docs theme.
]

myst_enable_extensions = [
"dollarmath",
"amsmath",
]

# Generate documentation from all docstrings.
autodoc_default_options = {
"member-order": "bysource", # Sort by order in the source.
Expand Down
47 changes: 24 additions & 23 deletions docs/src/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,36 +13,37 @@ Here we briefly introduce spectrum_utils' spectrum processing and visualization
IO functionality to read spectra from MS data files is not directly included in spectrum_utils.
Instead you can use excellent libraries to read a variety of mass spectrometry data formats such as [Pyteomics](https://pyteomics.readthedocs.io/) or [pymzML](https://pymzml.readthedocs.io/).

```python
import matplotlib.pyplot as plt
import spectrum_utils.plot as sup
import spectrum_utils.spectrum as sus


```{doctest}
>>> import matplotlib.pyplot as plt
>>> import spectrum_utils.plot as sup
>>> import spectrum_utils.spectrum as sus


# Retrieve the spectrum by its USI.
usi = "mzspec:PXD004732:01650b_BC2-TUM_first_pool_53_01_01-3xHCD-1h-R2:scan:41840"
peptide = "WNQLQAFWGTGK"
spectrum = sus.MsmsSpectrum.from_usi(usi)
>>> usi = "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555"
>>> peptide = "VLHPLEGAVVIIFK"
>>> spectrum = sus.MsmsSpectrum.from_usi(usi)

# Process the spectrum.
fragment_tol_mass, fragment_tol_mode = 10, "ppm"
spectrum = (
spectrum.set_mz_range(min_mz=100, max_mz=1400)
.remove_precursor_peak(fragment_tol_mass, fragment_tol_mode)
.filter_intensity(min_intensity=0.05, max_num_peaks=50)
.scale_intensity("root")
.annotate_proforma(
peptide, fragment_tol_mass, fragment_tol_mode, ion_types="aby"
)
)
>>> fragment_tol_mass, fragment_tol_mode = 10, "ppm"
>>> spectrum = (
... spectrum.set_mz_range(min_mz=100, max_mz=1400)
... .remove_precursor_peak(fragment_tol_mass, fragment_tol_mode)
... .filter_intensity(min_intensity=0.05, max_num_peaks=50)
... .scale_intensity("root")
... .annotate_proforma(
... peptide, fragment_tol_mass, fragment_tol_mode, ion_types="aby")
... )

# Plot the spectrum.
fig, ax = plt.subplots(figsize=(12, 6))
sup.spectrum(spectrum, grid=False, ax=ax)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
plt.savefig("quickstart.png", bbox_inches="tight", dpi=300, transparent=True)
plt.close()
>>> fig, ax = plt.subplots(figsize=(12, 6))
>>> sup.spectrum(spectrum, grid=False, ax=ax)
>>> ax.spines["right"].set_visible(False)
>>> ax.spines["top"].set_visible(False)
>>> plt.savefig("quickstart.png", bbox_inches="tight", dpi=300, transparent=True)
>>> plt.close()
```

As demonstrated, each of the processing steps can be achieved using a single, high-level function call.
Expand Down
2 changes: 1 addition & 1 deletion spectrum_utils/fragment_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ def get_theoretical_fragments(
(
fragment_sequence,
"b",
f"{start_i+1}:{stop_i+1}",
f"{start_i + 1}:{stop_i + 1}",
mod_mass,
)
)
Expand Down
10 changes: 4 additions & 6 deletions spectrum_utils/proforma.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,12 +400,12 @@ def aa(self, tree) -> None:
self._sequence.append(tree[0])
# An amino acid token can be followed by (i) a modification on that
# residue, or (ii) a label (linking it to another modified residue).
if isinstance(tree[1], Label):
if len(tree) > 1 and isinstance(tree[1], Label):
# noinspection PyArgumentList
self._modifications.append(
Modification(position=position, label=tree[1])
)
elif isinstance(tree[1], Modification):
elif len(tree) > 1 and isinstance(tree[1], Modification):
tree[1].position = position
self._modifications.append(tree[1])

Expand Down Expand Up @@ -879,10 +879,8 @@ def _parse_obo(
elif (
cv_id == "XLMOD"
and isinstance(clause, fastobo.term.PropertyValueClause)
and (
clause.property_value.relation.prefix
== "monoIsotopicMass"
)
and "monoIsotopicMass"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: What is the advantage of the new version vs the previous code?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well the main reason for this change because from the previous code we were facing attribute error ,as it was depending on how fastobo parses the OBO file, the relation might include an extra colon or whitespace (e.g. "monoIsotopicMass:" or with surrounding spaces) as seen in XLMOD.obo file. so i change the previous implementation so that minor formatting differences don’t cause the condition to fail.

in str(clause.property_value.relation)
):
term_mass = float(clause.property_value.value)
elif cv_id == "GNO" and isinstance(
Expand Down
29 changes: 24 additions & 5 deletions spectrum_utils/spectrum.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import os
import copy
import functools
import urllib.parse
Expand All @@ -22,7 +23,13 @@ def __init__(self, **kwargs):


# Reload the Pyteomics PROXI aggregator to also include GNPS.
pyteomics.usi._proxies["gnps"] = GnpsBackend
# Only perform the assignment if not building docs.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: Why is this necessary?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hi @bittremieux ,its good to see u again .so this ensures that the GNPS backend will be used for normal runtime usage, while avoiding potential issues during Sphinx documentation builds or in environments .so that while doing doctest it should not cause any issue .

if not os.environ.get("SPHINX_BUILD"):
try:
pyteomics.usi._proxies["gnps"] = GnpsBackend
except Exception:
pass

pyteomics.usi.AGGREGATOR = pyteomics.usi.PROXIAggregator()


Expand Down Expand Up @@ -75,7 +82,7 @@ def intensity(self) -> np.ndarray:
def round(
self, decimals: int = 0, combine: str = "sum"
) -> "MsmsSpectrumJit":
mz_round = np.round_(self._mz, decimals, np.empty_like(self._mz))
mz_round = np.round(self._mz, decimals, np.empty_like(self._mz))
mz_unique = np.unique(mz_round)
if len(mz_unique) == len(mz_round):
self._mz = mz_unique
Expand Down Expand Up @@ -646,13 +653,24 @@ def _annotate_proteoforms(
(since parsing the sequence is a lot slower than annotating the
peaks).

>>> import spectrum_utils.spectrum as sus
>>> identifier = "test_spec"
>>> precursor_mz = 500.0
>>> precursor_charge = 2
>>> mz_array = np.array([100.0, 200.0, 300.0])
>>> intensity_array = np.array([10.0, 20.0, 30.0])

>>> spec = sus.MsmsSpectrum(identifier, precursor_mz, precursor_charge, mz_array, intensity_array)
>>> proforma_sequence = "MYPEPTIDEK/2"
>>> spectrum.annotate_proforma(proforma_sequence, ...)
>>> _ = spec.annotate_proforma(proforma_str =proforma_sequence, fragment_tol_mass=10.0,
... fragment_tol_mode ="ppm", ion_types="by")

or
--- or
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

todo: This interprets it as a Python or statement. Better to not have this be interpreted by doctest.

Copy link
Author

@dikshant182004 dikshant182004 Mar 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay i will update this also .but is not causing any issue + it will help in maintaining the identation with other code .


>>> parsed_proforma = proforma.parse(proforma_sequence)
>>> spectrum._annotate_proteoforms(parsed_proforma, proforma_sequence, ...)
>>> _ = spec._annotate_proteoforms(proteoforms=parsed_proforma, proforma_str =proforma_sequence,
... fragment_tol_mass=10.0, fragment_tol_mode ="ppm", ion_types="by")


WARN:
This function does not check that the passed sequence
Expand Down Expand Up @@ -704,6 +722,7 @@ def _annotate_proteoforms(
> fragment_tol_mass
):
fragment_i += 1

i = 0
while (
fragment_i + i < len(fragments)
Expand Down
12 changes: 8 additions & 4 deletions tests/fragment_annotation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,10 @@ def test_get_theoretical_fragments_neutral_loss():
assert fragment_mz == pytest.approx(
fragments[
f"""{annotation.ion_type}^{annotation.charge}{
annotation.neutral_loss if annotation.neutral_loss is not None
else ''}"""
annotation.neutral_loss
if annotation.neutral_loss is not None
else ""
}"""
]
)

Expand Down Expand Up @@ -390,8 +392,10 @@ def test_get_theoretical_fragments_mod_neutral_loss():
assert fragment_mz == pytest.approx(
fragments[
f"""{annotation.ion_type}^{annotation.charge}{
annotation.neutral_loss if annotation.neutral_loss is not None
else ''}"""
annotation.neutral_loss
if annotation.neutral_loss is not None
else ""
}"""
]
)

Expand Down
4 changes: 2 additions & 2 deletions tests/spectrum_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_mz_array():
mz = np.random.uniform(100, 1400, num_peaks).tolist()
intensity = np.random.lognormal(0, 1, num_peaks)
spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
assert type(spec.mz) == np.ndarray
assert isinstance(spec.mz, np.ndarray)
with pytest.raises(AttributeError):
spec.mz = np.random.uniform(100, 1400, num_peaks)

Expand All @@ -59,7 +59,7 @@ def test_intensity_array():
mz = np.random.uniform(100, 1400, num_peaks)
intensity = np.random.lognormal(0, 1, num_peaks).tolist()
spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
assert type(spec.intensity) == np.ndarray
assert isinstance(spec.intensity, np.ndarray)
with pytest.raises(AttributeError):
spec.intensity = np.random.lognormal(0, 1, num_peaks)

Expand Down