Skip to content

Commit

Permalink
Update glycan composition interface, support indexing links during fr…
Browse files Browse the repository at this point in the history
…agment name
  • Loading branch information
mobiusklein committed Aug 16, 2023
1 parent c02ee95 commit 17a42d8
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 58 deletions.
6 changes: 5 additions & 1 deletion src/glypy/_c/structure/glycan_composition.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,8 @@ cdef class _CompositionBase(dict):
cpdef str serialize(self)

cdef void _add_from(self, _CompositionBase other)
cdef void _subtract_from(self, _CompositionBase other)
cdef void _subtract_from(self, _CompositionBase other)

cpdef set_composition_offset(self, CComposition composition)
cpdef CComposition get_composition_offset(self)
cpdef _invalidate(self)
16 changes: 12 additions & 4 deletions src/glypy/_c/structure/glycan_composition.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ from glypy.composition import formula

from glypy._c.utils cimport _prepare_glycan_composition_string

cdef CComposition water = CComposition("H2O")


cdef CComposition WATER = CComposition("H2O")
cdef object ZERO = 0


Expand All @@ -21,7 +19,7 @@ cdef class _CompositionBase(dict):
cdef _CompositionBase inst = cls.__new__(cls)
inst._mass = None
inst._reducing_end = None
inst._composition_offset = CComposition._create(water)
inst._composition_offset = CComposition._create(WATER)
return inst

cpdef object _getitem_fast(self, object key):
Expand Down Expand Up @@ -114,6 +112,16 @@ cdef class _CompositionBase(dict):
result = "%s$%s" % (result, formula(reduced.total_composition()))
return result

cpdef set_composition_offset(self, CComposition composition):
self._invalidate()
self._composition_offset = composition

cpdef CComposition get_composition_offset(self):
return self._composition_offset

cpdef _invalidate(self):
self._mass = None


cdef str _reformat(dict self):
cdef:
Expand Down
67 changes: 35 additions & 32 deletions src/glypy/composition/ccomposition.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,17 @@ from .mass_dict import nist_mass
from .base import ChemicalCompositionError, composition_factory

cimport cython
from cpython cimport PY_MAJOR_VERSION

from cpython.ref cimport PyObject
from cpython.dict cimport (PyDict_GetItem, PyDict_SetItem, PyDict_Next,
PyDict_Keys, PyDict_Update, PyDict_DelItem, PyDict_Size)
from cpython.int cimport PyInt_AsLong, PyInt_Check, PyInt_FromLong


# if PY_MAJOR_VERSION < 3:
# from cpython.string cimport PyString_Format

# cdef extern from *:
# unicode PyUnicode_Format(object format, object args)

from glypy.composition.compat cimport PyStr_Format

from cpython.float cimport PyFloat_AsDouble
from cpython.tuple cimport PyTuple_GetItem
from cpython.list cimport PyList_GET_ITEM
from cpython.list cimport PyList_GET_ITEM, PyList_GET_SIZE

# Forward Declaration
cdef:
Expand Down Expand Up @@ -82,6 +74,36 @@ cdef str _make_isotope_string(str element_name, int isotope_num):
return <str>PyStr_Format('%s[%d]', parts)


cpdef CComposition composition_sum(list compositions):
cdef:
size_t i, n
CComposition accumulator, current
str elem
long cnt
PyObject *pkey
PyObject *pvalue
Py_ssize_t ppos = 0

n = PyList_GET_SIZE(compositions)
if n == 1:
accumulator = <CComposition>PyList_GET_ITEM(compositions, 0)
return accumulator.copy()
elif n == 0:
accumulator = CComposition()
return accumulator
else:
accumulator = (<CComposition>PyList_GET_ITEM(compositions, 0)).copy()
for i in range(1, n):
current = <CComposition>PyList_GET_ITEM(compositions, i)
ppos = 0

while(PyDict_Next(current, &ppos, &pkey, &pvalue)):
elem = <str>pkey
cnt = accumulator.getitem(elem)
accumulator.setitem(elem, cnt + PyInt_AsLong(<object>pvalue))
return accumulator


@cython.c_api_binop_methods(True)
cdef class CComposition(dict):
"""A Composition object stores a chemical composition of a
Expand All @@ -105,7 +127,10 @@ cdef class CComposition(dict):
mass_data : dict, optional
A dict with the masses of chemical elements (the default
value is :py:data:`nist_mass`). It is used for formulae parsing only.
"""
"""

sum = staticmethod(composition_sum)

def __str__(self): # pragma: no cover
return 'Composition({})'.format(dict.__repr__(self))

Expand Down Expand Up @@ -522,28 +547,6 @@ cdef class CComposition(dict):
Composition = CComposition


cpdef CComposition composition_sum(list compositions):
cdef:
size_t i
CComposition accumulator, current
str elem
long cnt
PyObject *pkey
PyObject *pvalue
Py_ssize_t ppos = 0

accumulator = CComposition()
for i in range(len(compositions)):
current = compositions[i]
ppos = 0

while(PyDict_Next(current, &ppos, &pkey, &pvalue)):
elem = <str>pkey
cnt = accumulator.getitem(elem)
accumulator.setitem(elem, cnt + PyInt_AsLong(<object>pvalue))
return accumulator


@cython.wraparound(False)
@cython.boundscheck(False)
cpdef double calculate_mass(CComposition composition=None, str formula=None, int average=False, charge=None, mass_data=None) except -1:
Expand Down
5 changes: 5 additions & 0 deletions src/glypy/composition/composition.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ def _parse_isotope_string(label):

class PComposition(defaultdict):
'''Represent arbitrary elemental compositions'''

@staticmethod
def sum(compositions: list) -> 'PComposition':
return sum(compositions, PComposition())

def __str__(self): # pragma: no cover
return 'Composition({})'.format(dict.__repr__(self))

Expand Down
67 changes: 63 additions & 4 deletions src/glypy/structure/fragment.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import itertools
import re
from dataclasses import dataclass, field
from typing import DefaultDict, Dict, List, Set, Optional, Deque, TYPE_CHECKING
from typing import DefaultDict, Dict, List, Set, Optional, Deque, TYPE_CHECKING, Tuple, Union

from glypy.composition import Composition

Expand Down Expand Up @@ -255,6 +255,59 @@ def residues_contained(self):
"""
return len(self.included_nodes)

@classmethod
def to_glycan_compositions(cls, glycan: 'Glycan', fragments: List['GlycanFragment'],
by_series: bool = True) -> Union[
DefaultDict['HashableGlycanComposition', List['GlycanFragment']],
DefaultDict[str, Dict['HashableGlycanComposition',
List['GlycanFragment']]]
]:
"""
From a list of :class:`GlycanFragment` instances, build
:class:`~glypy.structure.glycan_composition.HashableGlycanComposition`
instances corresponding to those fragments, and return a mapping relating
them.
Parameters
----------
glycan : :class:`~.glypy.structure.glycan.Glycan`
The glycan the fragments came from.
fragments : :class:`list` of :class:`~
"""
from glypy.structure.glycan_composition import (
FrozenMonosaccharideResidue, HashableGlycanComposition)
index_to_residue = {
node.id: FrozenMonosaccharideResidue.from_monosaccharide(
node, False, False, False, False
)
for node in glycan
}

compositions: DefaultDict[HashableGlycanComposition, List['GlycanFragment']] = DefaultDict(list)

for frag in fragments:
gc = HashableGlycanComposition()
for node_id in frag.included_nodes:
gc[index_to_residue[node_id]] += 1
compositions[gc].append(frag)
if not by_series:
return compositions

_shift_cache = {}
results: DefaultDict[str, Dict[HashableGlycanComposition, List[GlycanFragment]]] = DefaultDict(dict)
for gc, frags in compositions.items():
frags.sort(key=lambda x: x.kind)
for key, subset in itertools.groupby(frags, lambda x: x.kind):
if key not in _shift_cache:
comp_shift = Composition.sum([_fragment_shift[k] for k in key])
_shift_cache[key] = comp_shift
else:
comp_shift = _shift_cache[key]
tmp = gc.clone()
tmp.composition_offset = tmp.composition_offset - comp_shift
results[key][tmp] = list(subset)
return results


Fragment = GlycanFragment

Expand Down Expand Up @@ -478,8 +531,9 @@ def generate_y_fragments(cls, glycan: 'Glycan', include_composition=False, trave
) for links in map(flatten, cls.build_from(glycan.root)))
)
)
link_index = {link.id: link for _, link in glycan.iterlinks()}
for frag in fragments:
frag.name = glycan.name_fragment(frag)
frag.name = glycan.name_fragment(frag, link_index=link_index)
return fragments


Expand Down Expand Up @@ -528,8 +582,11 @@ def y_fragments_from_links(links_to_break: List['Link'], **kwargs):


def y_fragments_to_glycan_compositions(glycan: 'Glycan',
fragments: List[GlycanFragment]) -> DefaultDict['HashableGlycanComposition',
List[GlycanFragment]]:
fragments: List[GlycanFragment],
composition_offset: Optional[Composition] = None) -> DefaultDict[
'HashableGlycanComposition',
List[GlycanFragment]
]:
from glypy.structure.glycan_composition import (
FrozenMonosaccharideResidue, HashableGlycanComposition)
index_to_residue = {
Expand All @@ -545,6 +602,8 @@ def y_fragments_to_glycan_compositions(glycan: 'Glycan',
gc = HashableGlycanComposition()
for node_id in frag.included_nodes:
gc[index_to_residue[node_id]] += 1
if composition_offset is not None:
gc.set_composition_offset(composition_offset)
compositions[gc].append(frag)

return compositions
Expand Down
27 changes: 22 additions & 5 deletions src/glypy/structure/glycan.py
Original file line number Diff line number Diff line change
Expand Up @@ -1236,8 +1236,8 @@ def substructures(self, max_cleavages=1, min_cleavages=1, inplace=False):
max_cleavages):
yield frag

def name_fragment(self, fragment):
'''
def name_fragment(self, fragment, link_index=None):
"""
Attempt to assign a full name to a fragment based on the branch and position relative to
the reducing end along side A/B/C/X/Y/Z, according to :title-reference:`Domon and Costello`
Expand All @@ -1260,7 +1260,18 @@ def name_fragment(self, fragment):
"k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
"u" | "v" | "w" | "x" | "y" | "z"
'''
Parameters
----------
fragment : :class:`~glypy.structure.fragment.GlycanFragment`
The fragment to name
link_index : dict, optional
A mapping to look up |Link| objects in, in case the :attr:`link_index` isn't
reliable.
Returns
-------
str
"""

break_targets = fragment.link_ids
crossring_targets = fragment.crossring_cleavages
Expand Down Expand Up @@ -1291,7 +1302,10 @@ def name_fragment(self, fragment):
for break_id, ion_type in break_targets.items():
ion_type = ion_type[1]
if _fragment_direction[ion_type] > 0:
link = self.link_index[break_id - 1]
if link_index:
link = link_index[break_id]
else:
link = self.link_index[break_id - 1]
label = link.label
name = "{}{}".format(
ion_type,
Expand All @@ -1300,7 +1314,10 @@ def name_fragment(self, fragment):
""))
name_parts.append(name)
else:
link = self.link_index[break_id - 1]
if link_index:
link = link_index[break_id]
else:
link = self.link_index[break_id - 1]
label = link.label
label_key = label[0]
distance = int(label[1:])
Expand Down
23 changes: 11 additions & 12 deletions src/glypy/structure/glycan_composition.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,16 @@ def serialize(self):
form = "%s$%s" % (form, formula(reduced.total_composition()))
return form

def get_composition_offset(self) -> Composition:
return self._composition_offset

def set_composition_offset(self, composition: Composition):
self._composition_offset = composition
self._invalidate()

def _invalidate(self):
self._mass = None

try:
from glypy._c.structure.glycan_composition import _CompositionBase
except ImportError:
Expand Down Expand Up @@ -1392,18 +1402,7 @@ def set_reducing_end(self, value):
self._invalidate()
self._reducing_end = value

def _invalidate(self):
self._mass = None

@property
def composition_offset(self):
return self._composition_offset

@composition_offset.setter
def composition_offset(self, value):
self._invalidate()
self._composition_offset = value

composition_offset = property(_CompositionBase.get_composition_offset, _CompositionBase.set_composition_offset)

def clone(self, propogate_composition_offset=True, copy_nodes=True):
dup = self._empty()
Expand Down

0 comments on commit 17a42d8

Please sign in to comment.