diff --git a/.github/workflows/generate_api.yaml b/.github/workflows/generate_api.yaml new file mode 100644 index 0000000..b321a1d --- /dev/null +++ b/.github/workflows/generate_api.yaml @@ -0,0 +1,38 @@ +name: Generate API + +on: push + +jobs: + generate-api: + runs-on: ubuntu-latest + env: + LIB_NAME: ${{ secrets.LIB_NAME }} + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install sdRDM + run: python3 -m pip install git+https://github.com/JR-1991/software-driven-rdm.git@20-lowest-level-elements-in-xml-cannot-have-attributes-and-content + + - name: Generate API + env: + URL: ${{github.repositoryUrl}} + COMMIT: ${{github.sha}} + run: sdrdm generate --path ./specifications/ --out . --name "$LIB_NAME" --url "$URL" --commit "$COMMIT" + + - name: Push source code + run: | + if [[ `git status --porcelain` ]]; then + git add "$LIB_NAME" + git config --global user.name 'sdRDM Bot' + git config --global user.email 'sdRDM@bot.com' + git commit -am "API update" + git push + else + echo "Nothing changed!" + fi diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..d99f2f3 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + }, + "python.formatting.provider": "none" +} \ No newline at end of file diff --git a/links/enzymeml.toml b/links/enzymeml.toml new file mode 100644 index 0000000..a312726 --- /dev/null +++ b/links/enzymeml.toml @@ -0,0 +1,22 @@ +__model__ = "NMRpy" + +[__sources__] +EnzymeMLDocument = "https://github.com/EnzymeML/enzymeml-specifications.git@markdown-parser-refactor" + +[NMRpy] +datetime_created = "EnzymeMLDocument.created" +datetime_modified = "EnzymeMLDocument.modified" + +[experiment] +name = "EnzymeMLDocument.name" + +[citation] +doi = "EnzymeMLDocument.doi" + +["citation.authors"] +last_name = "EnzymeMLDocument.creators.given_name" +first_name = "EnzymeMLDocument.creators.family_name" +email = "EnzymeMLDocument.creators.mail" + +["citation.related_publications"] +doi = "EnzymeMLDocument.url" diff --git a/links/nmrml.toml b/links/nmrml.toml new file mode 100644 index 0000000..e69de29 diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index f8d546e..8401371 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -1,3 +1,4 @@ +from pathlib import Path import numpy import scipy from matplotlib import pyplot @@ -9,22 +10,45 @@ from nmrpy.plotting import * import os import pickle +from ipywidgets import SelectMultiple +from sdRDM import DataModel -class Base(): + +class Base: _complex_dtypes = [ - numpy.dtype('csingle'), - numpy.dtype('cdouble'), - numpy.dtype('clongdouble'), - ] + numpy.dtype("csingle"), + numpy.dtype("cdouble"), + numpy.dtype("clongdouble"), + ] - _file_formats = ['varian', 'bruker', None] + _file_formats = ["varian", "bruker", None] def __init__(self, *args, **kwargs): - self.id = kwargs.get('id', None) - self._procpar = kwargs.get('procpar', None) + self.id = kwargs.get("id", None) + self._procpar = kwargs.get("procpar", None) self._params = None - self.fid_path = kwargs.get('fid_path', '.') + self.fid_path = kwargs.get("fid_path", ".") self._file_format = None + # self.parameters_object = self.lib.Parameters() + + @property + def lib(self): + try: + self.__lib + except: + self.__lib = DataModel.from_markdown( + path=Path(__file__).parent.parent / "specifications" + ) + return self.__lib + + # @property + # def parameters_object(self): + # return self.__parameter_object + + # @parameters_object.setter + # def parameters_object(self, parameters_object): + # if isinstance(parameters_object, DataModel): + # self.__parameters_object = parameters_object @property def id(self): @@ -35,8 +59,8 @@ def id(self, id): if isinstance(id, str) or id is None: self.__id = id else: - raise AttributeError('ID must be a string or None.') - + raise AttributeError("ID must be a string or None.") + @property def fid_path(self): return self.__fid_path @@ -46,7 +70,7 @@ def fid_path(self, fid_path): if isinstance(fid_path, str): self.__fid_path = fid_path else: - raise AttributeError('fid_path must be a string.') + raise AttributeError("fid_path must be a string.") @property def _file_format(self): @@ -57,7 +81,9 @@ def _file_format(self, file_format): if file_format in self._file_formats: self.__file_format = file_format else: - raise AttributeError('_file_format must be "varian", "bruker", or None.') + raise AttributeError( + '_file_format must be "varian", "bruker", or None.' + ) @classmethod def _is_iter(cls, i): @@ -90,12 +116,30 @@ def _procpar(self): @_procpar.setter def _procpar(self, procpar): if procpar is None: - self.__procpar = procpar + self.__procpar = procpar elif isinstance(procpar, dict): - self.__procpar = procpar + self.__procpar = procpar self._params = self._extract_procpar(procpar) + # self.parameters_object( + # acquisition_time=self._params.get("at"), + # relaxation_time=self._params.get("d1"), + # repetition_time=self._params.get("rt"), + # spectral_width_ppm=self._params.get("sw"), + # spectral_width_hz=self._params.get("sw_hz"), + # spectrometer_frequency=self._params.get("sfrq"), + # reference_frequency=self._params.get("reffrq"), + # spectral_width_left=self._params.get("sw_left"), + # ) + # for _ in self._params.get("nt"): + # if type(_) is not None: + # self.fid_object.parameters.number_of_transients.append(_) + # for _ in self._params.get("acqtime"): + # if type(_) is not None: + # self.fid_object.parameters.acquisition_times_array.append( + # _ + # ) else: - raise AttributeError('procpar must be a dictionary or None.') + raise AttributeError("procpar must be a dictionary or None.") @property def _params(self): @@ -106,16 +150,16 @@ def _params(self, params): if isinstance(params, dict) or params is None: self.__params = params else: - raise AttributeError('params must be a dictionary or None.') + raise AttributeError("params must be a dictionary or None.") - #processing + # processing def _extract_procpar(self, procpar): - if self._file_format == 'bruker': + if self._file_format == "bruker": return self._extract_procpar_bruker(procpar) - elif self._file_format == 'varian': + elif self._file_format == "varian": return self._extract_procpar_varian(procpar) - #else: - # raise AttributeError('Could not parse procpar.') + # else: + # raise AttributeError('Could not parse procpar.') @staticmethod def _extract_procpar_varian(procpar): @@ -123,71 +167,72 @@ def _extract_procpar_varian(procpar): Extract some commonely-used NMR parameters (using Varian denotations) and return a parameter dictionary 'params'. """ - at = float(procpar['procpar']['at']['values'][0]) - d1 = float(procpar['procpar']['d1']['values'][0]) - sfrq = float(procpar['procpar']['sfrq']['values'][0]) - reffrq = float(procpar['procpar']['reffrq']['values'][0]) - rfp = float(procpar['procpar']['rfp']['values'][0]) - rfl = float(procpar['procpar']['rfl']['values'][0]) - tof = float(procpar['procpar']['tof']['values'][0]) - rt = at+d1 + at = float(procpar["procpar"]["at"]["values"][0]) + d1 = float(procpar["procpar"]["d1"]["values"][0]) + sfrq = float(procpar["procpar"]["sfrq"]["values"][0]) + reffrq = float(procpar["procpar"]["reffrq"]["values"][0]) + rfp = float(procpar["procpar"]["rfp"]["values"][0]) + rfl = float(procpar["procpar"]["rfl"]["values"][0]) + tof = float(procpar["procpar"]["tof"]["values"][0]) + rt = at + d1 nt = numpy.array( - [procpar['procpar']['nt']['values']], dtype=int).flatten() + [procpar["procpar"]["nt"]["values"]], dtype=int + ).flatten() acqtime = numpy.zeros(nt.shape) - acqtime[0] = (rt * nt[0] / 2) + acqtime[0] = rt * nt[0] / 2 for i in range(1, len(nt)): acqtime[i] = acqtime[i - 1] + (nt[i - 1] + nt[i]) / 2 * rt - acqtime /= 60. # convert to min - sw_hz = float(procpar['procpar']['sw']['values'][0]) - sw = round(sw_hz/reffrq, 2) - sw_left = (0.5+1e6*(sfrq-reffrq)/sw_hz)*sw_hz/sfrq + acqtime /= 60.0 # convert to min + sw_hz = float(procpar["procpar"]["sw"]["values"][0]) + sw = round(sw_hz / reffrq, 2) + sw_left = (0.5 + 1e6 * (sfrq - reffrq) / sw_hz) * sw_hz / sfrq params = dict( - at=at, - d1=d1, - rt=rt, - nt=nt, - acqtime=acqtime, - sw=sw, - sw_hz=sw_hz, - sfrq=sfrq, - reffrq=reffrq, - rfp=rfp, - rfl=rfl, - tof=tof, - sw_left=sw_left, - ) + at=at, # acquisition time + d1=d1, # relaxation delay + rt=rt, # repetition time (at+d1) + nt=nt, # number of transients + acqtime=acqtime, # acquisition times array (nt, 2nt, .., ntxrt) + sw=sw, # spectral width / ppm + sw_hz=sw_hz, # sw / Hz + sfrq=sfrq, # spectrometer frequency + reffrq=reffrq, # reference frequency + rfp=rfp, # irrelevant + rfl=rfl, # irrelevant + tof=tof, # irrelevant + sw_left=sw_left, # spectral window left + ) return params @staticmethod - def _extract_procpar_bruker(procpar): + def _extract_procpar_bruker(procpar): """ Extract some commonly-used NMR parameters (using Bruker denotations) and return a parameter dictionary 'params'. """ - d1 = procpar['acqus']['D'][1] - reffrq = procpar['acqus']['SFO1'] - nt = procpar['acqus']['NS'] - sw_hz = procpar['acqus']['SW_h'] - sw = procpar['acqus']['SW'] + d1 = procpar["acqus"]["D"][1] + reffrq = procpar["acqus"]["SFO1"] + nt = procpar["acqus"]["NS"] + sw_hz = procpar["acqus"]["SW_h"] + sw = procpar["acqus"]["SW"] # lefthand offset of the processed data in ppm - if 'procs' in procpar: - sfrq = procpar['procs']['SF'] - sw_left = procpar['procs']['OFFSET'] + if "procs" in procpar: + sfrq = procpar["procs"]["SF"] + sw_left = procpar["procs"]["OFFSET"] else: - sfrq = procpar['acqus']['BF1'] - sw_left = (0.5+1e6*(sfrq-reffrq)/sw_hz)*sw_hz/sfrq - at = procpar['acqus']['TD']/(2*sw_hz) - rt = at+d1 - td = procpar['tdelta'] - cumulative = procpar['tcum'] - single = procpar['tsingle'] - tstart = cumulative - 0.5*single # tstart for acquisition - al = procpar['arraylength'] - a = procpar['arrayset'] + sfrq = procpar["acqus"]["BF1"] + sw_left = (0.5 + 1e6 * (sfrq - reffrq) / sw_hz) * sw_hz / sfrq + at = procpar["acqus"]["TD"] / (2 * sw_hz) + rt = at + d1 + td = procpar["tdelta"] + cumulative = procpar["tcum"] + single = procpar["tsingle"] + tstart = cumulative - 0.5 * single # tstart for acquisition + al = procpar["arraylength"] + a = procpar["arrayset"] acqtime = numpy.zeros((al)) - acqtime[0] = tstart[a-1] + acqtime[0] = tstart[a - 1] for i in range(1, al): - acqtime[i] = acqtime[i-1] + td + acqtime[i] = acqtime[i - 1] + td params = dict( at=at, d1=d1, @@ -199,18 +244,20 @@ def _extract_procpar_bruker(procpar): sfrq=sfrq, reffrq=reffrq, sw_left=sw_left, - ) + ) return params + class Fid(Base): - ''' + """ The basic FID (Free Induction Decay) class contains all the data for a single spectrum (:attr:`~nmrpy.data_objects.Fid.data`), and the necessary methods to process these data. - ''' + """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.data = kwargs.get('data', []) + self.fid_object = self.lib.FID() + self.data = kwargs.get("data", []) self.peaks = None self.ranges = None self._deconvoluted_peaks = None @@ -218,9 +265,17 @@ def __init__(self, *args, **kwargs): "ft": False, } - def __str__(self): - return 'FID: %s (%i data)'%(self.id, len(self.data)) + return "FID: %s (%i data)" % (self.id, len(self.data)) + + @property + def fid_object(self): + return self.__fid_object + + @fid_object.setter + def fid_object(self, fid_object): + if isinstance(fid_object, DataModel): + self.__fid_object = fid_object @property def data(self): @@ -228,11 +283,14 @@ def data(self): The spectral data. This is the primary object upon which the processing and analysis functions work. """ return self.__data - - @data.setter + + @data.setter def data(self, data): if Fid._is_valid_dataset(data): self.__data = numpy.array(data) + # for _ in self.__data: + # if type(_) is not None: + # self.fid_object.data.append(float(_)) @property def _ppm(self): @@ -240,7 +298,11 @@ def _ppm(self): Index of :attr:`~nmrpy.data_objects.Fid.data` in ppm (parts per million). """ if self._params is not None and self.data is not None: - return numpy.linspace(self._params['sw_left']-self._params['sw'], self._params['sw_left'], len(self.data))[::-1] + return numpy.linspace( + self._params["sw_left"] - self._params["sw"], + self._params["sw_left"], + len(self.data), + )[::-1] else: return None @@ -250,14 +312,14 @@ def peaks(self): Picked peaks for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. """ return self._peaks - - @peaks.setter + + @peaks.setter def peaks(self, peaks): if peaks is not None: if not Fid._is_flat_iter(peaks): - raise AttributeError('peaks must be a flat iterable') + raise AttributeError("peaks must be a flat iterable") if not all(isinstance(i, numbers.Number) for i in peaks): - raise AttributeError('peaks must be numbers') + raise AttributeError("peaks must be numbers") self._peaks = numpy.array(peaks) else: self._peaks = peaks @@ -268,34 +330,40 @@ def ranges(self): Picked ranges for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. """ return self._ranges - - @ranges.setter + + @ranges.setter def ranges(self, ranges): if ranges is None: self._ranges = None return if not Fid._is_iter_of_iters(ranges) or ranges is None: - raise AttributeError('ranges must be an iterable of iterables or None') + raise AttributeError( + "ranges must be an iterable of iterables or None" + ) ranges = numpy.array(ranges) if ranges.shape[1] != 2: - raise AttributeError('ranges must be an iterable of 2-length iterables or an empty iterables e.g. [[]]') + raise AttributeError( + "ranges must be an iterable of 2-length iterables or an empty iterables e.g. [[]]" + ) for r in ranges: if not all(isinstance(i, numbers.Number) for i in r): - raise AttributeError('ranges must be numbers') + raise AttributeError("ranges must be numbers") self._ranges = ranges @property def _bl_ppm(self): return self.__bl_ppm - - @_bl_ppm.setter + + @_bl_ppm.setter def _bl_ppm(self, bl_ppm): if bl_ppm is not None: if not Fid._is_flat_iter(bl_ppm): - raise AttributeError('baseline indices must be a flat iterable') + raise AttributeError( + "baseline indices must be a flat iterable" + ) if len(bl_ppm) > 0: if not all(isinstance(i, numbers.Number) for i in bl_ppm): - raise AttributeError('baseline indices must be numbers') + raise AttributeError("baseline indices must be numbers") self.__bl_ppm = numpy.sort(list(set(bl_ppm)))[::-1] else: self.__bl_ppm = None @@ -305,21 +373,28 @@ def _bl_ppm(self, bl_ppm): @property def _bl_indices(self): if self._bl_ppm is not None: - return self._conv_to_index(self.data, self._bl_ppm, self._params['sw_left'], self._params['sw']) + return self._conv_to_index( + self.data, + self._bl_ppm, + self._params["sw_left"], + self._params["sw"], + ) else: return None @property def _bl_poly(self): return self.__bl_poly - - @_bl_poly.setter + + @_bl_poly.setter def _bl_poly(self, bl_poly): if bl_poly is not None: if not Fid._is_flat_iter(bl_poly): - raise AttributeError('baseline polynomial must be a flat iterable') + raise AttributeError( + "baseline polynomial must be a flat iterable" + ) if not all(isinstance(i, numbers.Number) for i in bl_poly): - raise AttributeError('baseline polynomial must be numbers') + raise AttributeError("baseline polynomial must be numbers") self.__bl_poly = numpy.array(bl_poly) else: self.__bl_ppm = bl_poly @@ -330,9 +405,14 @@ def _index_peaks(self): :attr:`~nmrpy.data_objects.Fid.peaks` converted to indices rather than ppm """ if self.peaks is not None: - return self._conv_to_index(self.data, self.peaks, self._params['sw_left'], self._params['sw']) + return self._conv_to_index( + self.data, + self.peaks, + self._params["sw_left"], + self._params["sw"], + ) else: - return [] + return [] @property def _index_ranges(self): @@ -341,10 +421,15 @@ def _index_ranges(self): """ if self.ranges is not None: shp = self.ranges.shape - index_ranges = self._conv_to_index(self.data, self.ranges.flatten(), self._params['sw_left'], self._params['sw']) + index_ranges = self._conv_to_index( + self.data, + self.ranges.flatten(), + self._params["sw_left"], + self._params["sw"], + ) return index_ranges.reshape(shp) else: - return [] + return [] @property def _grouped_peaklist(self): @@ -352,18 +437,37 @@ def _grouped_peaklist(self): :attr:`~nmrpy.data_objects.Fid.peaks` grouped according to :attr:`~nmrpy.data_objects.Fid.ranges` """ if self.ranges is not None: - return numpy.array([[peak for peak in self.peaks if peak > min(peak_range) and peak < max(peak_range)] - for peak_range in self.ranges], dtype=object) + return numpy.array( + [ + [ + peak + for peak in self.peaks + if peak > min(peak_range) and peak < max(peak_range) + ] + for peak_range in self.ranges + ], + dtype=object, + ) else: return [] + @property def _grouped_index_peaklist(self): """ :attr:`~nmrpy.data_objects.Fid._index_peaks` grouped according to :attr:`~nmrpy.data_objects.Fid._index_ranges` """ if self._index_ranges is not None: - return numpy.array([[peak for peak in self._index_peaks if peak > min(peak_range) and peak < max(peak_range)] - for peak_range in self._index_ranges], dtype=object) + return numpy.array( + [ + [ + peak + for peak in self._index_peaks + if peak > min(peak_range) and peak < max(peak_range) + ] + for peak_range in self._index_ranges + ], + dtype=object, + ) else: return [] @@ -375,17 +479,17 @@ def _deconvoluted_peaks(self): def _deconvoluted_peaks(self, deconvoluted_peaks): """This is a list of lists of peak parameters with the order [offset, gauss_sigma, lorentz_hwhm, amplitude, frac_gauss]: - offset: spectral offset + offset: spectral offset - gauss_sigma: Gaussian sigma + gauss_sigma: Gaussian sigma - lorentz_hwhm: Lorentzian half-width-at-half-maximum + lorentz_hwhm: Lorentzian half-width-at-half-maximum - amplitude: height of peak + amplitude: height of peak - frac_gauss: fraction of peak to be Gaussian (Lorentzian fraction is 1-frac_gauss) - """ - self.__deconvoluted_peaks = deconvoluted_peaks + frac_gauss: fraction of peak to be Gaussian (Lorentzian fraction is 1-frac_gauss) + """ + self.__deconvoluted_peaks = deconvoluted_peaks @property def deconvoluted_integrals(self): @@ -395,16 +499,22 @@ def deconvoluted_integrals(self): if self._deconvoluted_peaks is not None: integrals = [] for peak in self._deconvoluted_peaks: - int_gauss = peak[-1]*Fid._f_gauss_int(peak[3], peak[1]) - int_lorentz = (1-peak[-1])*Fid._f_lorentz_int(peak[3], peak[2]) - integrals.append(int_gauss+int_lorentz) + int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) + int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int( + peak[3], peak[2] + ) + integrals.append(int_gauss + int_lorentz) return integrals - + def _get_plots(self): """ Return a list of all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.Fid`. """ - plots = [self.__dict__[id] for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Plot)] + plots = [ + self.__dict__[id] + for id in sorted(self.__dict__) + if isinstance(self.__dict__[id], Plot) + ] return plots def _del_plots(self): @@ -420,7 +530,8 @@ def _get_widgets(self): Return a list of all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.Fid`. """ widgets = [ - id for id in sorted(self.__dict__) + id + for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Phaser) or isinstance(self.__dict__[id], Calibrator) or isinstance(self.__dict__[id], DataPeakSelector) @@ -439,15 +550,14 @@ def _del_widgets(self): @classmethod def _is_valid_dataset(cls, data): if isinstance(data, str): - raise TypeError('Data must be iterable not a string.') + raise TypeError("Data must be iterable not a string.") if not cls._is_iter(data): - raise TypeError('Data must be an iterable.') + raise TypeError("Data must be an iterable.") if not cls._is_flat_iter(data): - raise TypeError('Data must not be nested.') + raise TypeError("Data must not be nested.") if not all(isinstance(i, numbers.Number) for i in data): - raise TypeError('Data must consist of numbers only.') - return True - + raise TypeError("Data must consist of numbers only.") + return True @classmethod def from_data(cls, data): @@ -456,11 +566,11 @@ def from_data(cls, data): Instantiate a new :class:`~nmrpy.data_objects.Fid` object by providing a spectral data object as argument. Eg. :: - fid = Fid.from_data(data) + fid = Fid.from_data(data) """ new_instance = cls() new_instance.data = data - return new_instance + return new_instance def zf(self): """ @@ -472,7 +582,9 @@ def zf(self): in an artificially increased resolution once Fourier-transformed. """ - self.data = numpy.append(self.data, 0*self.data) + self.data = numpy.append(self.data, 0 * self.data) + for _ in self.data: + self.fid_object.data.append(float(_)) def emhz(self, lb=5.0): """ @@ -483,13 +595,24 @@ def emhz(self, lb=5.0): :keyword lb: degree of line-broadening in Hz. """ - self.data = numpy.exp(-numpy.pi*numpy.arange(len(self.data)) * (lb/self._params['sw_hz'])) * self.data + self.data = ( + numpy.exp( + -numpy.pi + * numpy.arange(len(self.data)) + * (lb / self._params["sw_hz"]) + ) + * self.data + ) + for _ in self.data: + self.fid_object.data.append(float(_)) def real(self): """ Discard imaginary component of :attr:`~nmrpy.data_objects.Fid.data`. """ self.data = numpy.real(self.data) + for _ in self.data: + self.fid_object.data.append(float(_)) # GENERAL FUNCTIONS def ft(self): @@ -502,12 +625,14 @@ def ft(self): series,' Math. Comput. 19: 297-301.*) """ - if self._flags['ft']: - raise ValueError('Data have already been Fourier Transformed.') + if self._flags["ft"]: + raise ValueError("Data have already been Fourier Transformed.") if Fid._is_valid_dataset(self.data): list_params = (self.data, self._file_format) self.data = Fid._ft(list_params) - self._flags['ft'] = True + for _ in self.data: + self.fid_object.data.append(float(_)) + self._flags["ft"] = True @classmethod def _ft(cls, list_params): @@ -516,170 +641,185 @@ def _ft(cls, list_params): list_params is a tuple of (, ). """ if len(list_params) != 2: - raise ValueError('Wrong number of parameters. list_params must contain [, ]') + raise ValueError( + "Wrong number of parameters. list_params must contain [, ]" + ) data, file_format = list_params if Fid._is_valid_dataset(data) and file_format in Fid._file_formats: data = numpy.array(numpy.fft.fft(data), dtype=data.dtype) s = len(data) - if file_format == 'varian' or file_format == None: - ft_data = numpy.append(data[int(s / 2.0):], data[: int(s / 2.0)]) - if file_format == 'bruker': - ft_data = numpy.append(data[int(s / 2.0):: -1], data[s: int(s / 2.0): -1]) + if file_format == "varian" or file_format == None: + ft_data = numpy.append( + data[int(s / 2.0) :], data[: int(s / 2.0)] + ) + if file_format == "bruker": + ft_data = numpy.append( + data[int(s / 2.0) :: -1], data[s : int(s / 2.0) : -1] + ) return ft_data - @staticmethod def _conv_to_ppm(data, index, sw_left, sw): - """ - Convert index array to ppm. - """ - if isinstance(index, list): - index = numpy.array(index) - frc_sw = index/float(len(data)) - ppm = sw_left-sw*frc_sw - if Fid._is_iter(ppm): - return numpy.array([round(i, 2) for i in ppm]) - else: - return round(ppm, 2) + """ + Convert index array to ppm. + """ + if isinstance(index, list): + index = numpy.array(index) + frc_sw = index / float(len(data)) + ppm = sw_left - sw * frc_sw + if Fid._is_iter(ppm): + return numpy.array([round(i, 2) for i in ppm]) + else: + return round(ppm, 2) @staticmethod def _conv_to_index(data, ppm, sw_left, sw): - """ - Convert ppm array to index. - """ - conv_to_int = False - if not Fid._is_iter(ppm): - ppm = [ppm] - conv_to_int = True - if isinstance(ppm, list): - ppm = numpy.array(ppm) - if any(ppm > sw_left) or any(ppm < sw_left-sw): - raise ValueError('ppm must be within spectral width.') - indices = len(data)*(sw_left-ppm)/sw - if conv_to_int: - return int(numpy.ceil(indices)) - return numpy.array(numpy.ceil(indices), dtype=int) - - def phase_correct(self, method='leastsq'): - """ - - Automatically phase-correct :attr:`~nmrpy.data_objects.Fid.data` by minimising - total absolute area. - - :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: - - Nelder-Mead (nelder) - - L-BFGS-B (l-bfgs-b) - - Conjugate Gradient (cg) - - Powell (powell) - - Newton-CG (newton) - """ - if self.data.dtype not in self._complex_dtypes: - raise TypeError('Only complex data can be phase-corrected.') - if not self._flags['ft']: - raise ValueError('Only Fourier-transformed data can be phase-corrected.') - print('phasing: %s'%self.id) - self.data = Fid._phase_correct((self.data, method)) + """ + Convert ppm array to index. + """ + conv_to_int = False + if not Fid._is_iter(ppm): + ppm = [ppm] + conv_to_int = True + if isinstance(ppm, list): + ppm = numpy.array(ppm) + if any(ppm > sw_left) or any(ppm < sw_left - sw): + raise ValueError("ppm must be within spectral width.") + indices = len(data) * (sw_left - ppm) / sw + if conv_to_int: + return int(numpy.ceil(indices)) + return numpy.array(numpy.ceil(indices), dtype=int) + + def phase_correct(self, method="leastsq"): + """ + + Automatically phase-correct :attr:`~nmrpy.data_objects.Fid.data` by minimising + total absolute area. + + :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: + + Nelder-Mead (nelder) + + L-BFGS-B (l-bfgs-b) + + Conjugate Gradient (cg) + + Powell (powell) + + Newton-CG (newton) + """ + if self.data.dtype not in self._complex_dtypes: + raise TypeError("Only complex data can be phase-corrected.") + if not self._flags["ft"]: + raise ValueError( + "Only Fourier-transformed data can be phase-corrected." + ) + print("phasing: %s" % self.id) + self.data = Fid._phase_correct((self.data, method)) + for _ in self.data: + self.fid_object.data.append(float(_)) @classmethod def _phase_correct(cls, list_params): - """ - Class method for phase-correction using multiprocessing. - list_params is a tuple of (, ). - """ - data, method = list_params - p = lmfit.Parameters() - p.add_many( - ('p0', 1.0, True), - ('p1', 0.0, True), - ) - mz = lmfit.minimize(Fid._phased_data_sum, p, args=([data]), method=method) - phased_data = Fid._ps(data, p0=mz.params['p0'].value, p1=mz.params['p1'].value) - if abs(phased_data.min()) > abs(phased_data.max()): - phased_data *= -1 - if sum(phased_data) < 0.0: - phased_data *= -1 - print('%d\t%d'%(mz.params['p0'].value, mz.params['p1'].value)) - return phased_data - + """ + Class method for phase-correction using multiprocessing. + list_params is a tuple of (, ). + """ + data, method = list_params + p = lmfit.Parameters() + p.add_many( + ("p0", 1.0, True), + ("p1", 0.0, True), + ) + mz = lmfit.minimize( + Fid._phased_data_sum, p, args=([data]), method=method + ) + phased_data = Fid._ps( + data, p0=mz.params["p0"].value, p1=mz.params["p1"].value + ) + if abs(phased_data.min()) > abs(phased_data.max()): + phased_data *= -1 + if sum(phased_data) < 0.0: + phased_data *= -1 + print("%d\t%d" % (mz.params["p0"].value, mz.params["p1"].value)) + return phased_data + @classmethod def _phased_data_sum(cls, pars, data): - err = Fid._ps(data, p0=pars['p0'].value, p1=pars['p1'].value).real - return numpy.array([abs(err).sum()]*2) + err = Fid._ps(data, p0=pars["p0"].value, p1=pars["p1"].value).real + return numpy.array([abs(err).sum()] * 2) @classmethod def _ps(cls, data, p0=0.0, p1=0.0): - """ - Linear phase correction - - :keyword p0: Zero order phase in degrees. - - :keyword p1: First order phase in degrees. - - """ - if not all(isinstance(i, (float, int)) for i in [p0, p1]): - raise TypeError('p0 and p1 must be floats or ints.') - if not data.dtype in Fid._complex_dtypes: - raise TypeError('data must be complex.') - # convert to radians - p0 = p0*numpy.pi/180.0 - p1 = p1*numpy.pi/180.0 - size = len(data) - ph = numpy.exp(1.0j*(p0+(p1*numpy.arange(size)/size))) - return ph*data + """ + Linear phase correction + + :keyword p0: Zero order phase in degrees. + + :keyword p1: First order phase in degrees. + + """ + if not all(isinstance(i, (float, int)) for i in [p0, p1]): + raise TypeError("p0 and p1 must be floats or ints.") + if not data.dtype in Fid._complex_dtypes: + raise TypeError("data must be complex.") + # convert to radians + p0 = p0 * numpy.pi / 180.0 + p1 = p1 * numpy.pi / 180.0 + size = len(data) + ph = numpy.exp(1.0j * (p0 + (p1 * numpy.arange(size) / size))) + return ph * data def ps(self, p0=0.0, p1=0.0): """ Linear phase correction of :attr:`~nmrpy.data_objects.Fid.data` - + :keyword p0: Zero order phase in degrees :keyword p1: First order phase in degrees - + """ if not all(isinstance(i, (float, int)) for i in [p0, p1]): - raise TypeError('p0 and p1 must be floats or ints.') + raise TypeError("p0 and p1 must be floats or ints.") if not self.data.dtype in self._complex_dtypes: - raise TypeError('data must be complex.') + raise TypeError("data must be complex.") # convert to radians - p0 = p0*numpy.pi/180.0 - p1 = p1*numpy.pi/180.0 + p0 = p0 * numpy.pi / 180.0 + p1 = p1 * numpy.pi / 180.0 size = len(self.data) - ph = numpy.exp(1.0j*(p0+(p1*numpy.arange(size)/size))) - self.data = ph*self.data + ph = numpy.exp(1.0j * (p0 + (p1 * numpy.arange(size) / size))) + self.data = ph * self.data + for _ in self.data: + self.fid_object.data.append(float(_)) def phaser(self): """ Instantiate a phase-correction GUI widget which applies to :attr:`~nmrpy.data_objects.Fid.data`. """ if not len(self.data): - raise AttributeError('data does not exist.') + raise AttributeError("data does not exist.") if self.data.dtype not in self._complex_dtypes: - raise TypeError('data must be complex.') + raise TypeError("data must be complex.") if not Fid._is_flat_iter(self.data): - raise AttributeError('data must be 1 dimensional.') + raise AttributeError("data must be 1 dimensional.") global _phaser_widget self._phaser_widget = Phaser(self) def calibrate(self): """ - Instantiate a GUI widget to select a peak and calibrate spectrum. - Left-clicking selects a peak. The user is then prompted to enter + Instantiate a GUI widget to select a peak and calibrate spectrum. + Left-clicking selects a peak. The user is then prompted to enter the PPM value of that peak for calibration. """ - plot_label = \ -''' + plot_label = """ Left - select peak -''' +""" plot_title = "Calibration {}".format(self.id) - self._calibrate_widget = Calibrator(self, - title=plot_title, - label=plot_label, - ) + self._calibrate_widget = Calibrator( + self, + title=plot_title, + label=plot_label, + ) def baseline_correct(self, deg=2): """ @@ -688,20 +828,22 @@ def baseline_correct(self, deg=2): (stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`) with polynomial of specified degree (stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`) and subtract this polynomial from :attr:`~nmrpy.data_objects.Fid.data`. - + :keyword deg: degree of fitted polynomial """ if self._bl_indices is None: - raise AttributeError('No points selected for baseline correction. Run fid.baseliner()') + raise AttributeError( + "No points selected for baseline correction. Run fid.baseliner()" + ) if not len(self.data): - raise AttributeError('data does not exist.') + raise AttributeError("data does not exist.") if self.data.dtype in self._complex_dtypes: - raise TypeError('data must not be complex.') + raise TypeError("data must not be complex.") if not Fid._is_flat_iter(self.data): - raise AttributeError('data must be 1 dimensional.') - + raise AttributeError("data must be 1 dimensional.") + data = self.data x = numpy.arange(len(data)) m = numpy.ones_like(x) @@ -712,20 +854,24 @@ def baseline_correct(self, deg=2): p = numpy.ma.polyfit(xm, ym, deg) yp = scipy.polyval(p, x) self._bl_poly = yp - data_bl = data-yp + data_bl = data - yp self.data = numpy.array(data_bl) + for _ in self.data: + self.fid_object.data.append(float(_)) def peakpick(self, thresh=0.1): - """ + """ Attempt to automatically identify peaks. Picked peaks are assigned to :attr:`~nmrpy.data_objects.Fid.peaks`. :keyword thresh: fractional threshold for peak-picking """ - peaks_ind = nmrglue.peakpick.pick(self.data, thresh*self.data.max()) + peaks_ind = nmrglue.peakpick.pick(self.data, thresh * self.data.max()) peaks_ind = [i[0] for i in peaks_ind] - peaks_ppm = Fid._conv_to_ppm(self.data, peaks_ind, self._params['sw_left'], self._params['sw']) + peaks_ppm = Fid._conv_to_ppm( + self.data, peaks_ind, self._params["sw_left"], self._params["sw"] + ) self.peaks = peaks_ppm print(self.peaks) @@ -739,19 +885,19 @@ def peakpicker(self): :meth:`~nmrpy.data_objects.Fid.deconv`). """ - plot_label = \ -''' + plot_label = """ Left - select peak Ctrl+Left - delete nearest peak Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -''' +""" plot_title = "Peak-picking {}".format(self.id) - self._peakpicker_widget = DataPeakSelector(self, - title=plot_title, - label=plot_label, - ) + self._peakpicker_widget = DataPeakSelector( + self, + title=plot_title, + label=plot_label, + ) def clear_peaks(self): """ @@ -774,48 +920,62 @@ def baseliner(self): :meth:`~nmrpy.data_objects.Fid.baseline_correction`). """ - plot_label = \ -''' + plot_label = """ Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -''' +""" plot_title = "Baseline correction {}".format(self.id) - self._baseliner_widget = FidRangeSelector(self, - title=plot_title, - label=plot_label, - ) - + self._baseliner_widget = FidRangeSelector( + self, + title=plot_title, + label=plot_label, + ) + @classmethod def _f_gauss(cls, offset, amplitude, gauss_sigma, x): - return amplitude*numpy.exp(-((offset-x)**2.0)/(2.0*gauss_sigma**2.0)) - + return amplitude * numpy.exp( + -((offset - x) ** 2.0) / (2.0 * gauss_sigma**2.0) + ) + @classmethod def _f_lorentz(cls, offset, amplitude, lorentz_hwhm, x): - #return amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+4.0*(offset-x)**2.0) - return amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+(x-offset)**2.0) + # return amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+4.0*(offset-x)**2.0) + return ( + amplitude + * lorentz_hwhm**2.0 + / (lorentz_hwhm**2.0 + (x - offset) ** 2.0) + ) @classmethod def _f_gauss_int(cls, amplitude, gauss_sigma): - return amplitude*numpy.sqrt(2.0*numpy.pi*gauss_sigma**2.0) + return amplitude * numpy.sqrt(2.0 * numpy.pi * gauss_sigma**2.0) @classmethod def _f_lorentz_int(cls, amplitude, lorentz_hwhm): - #empirical integral commented out - #x = numpy.arange(1000*lorentz_hwhm) - #return numpy.sum(amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+(x-len(x)/2)**2.0)) - #this integral forumula from http://magicplot.com/wiki/fit_equations - return amplitude*lorentz_hwhm*numpy.pi + # empirical integral commented out + # x = numpy.arange(1000*lorentz_hwhm) + # return numpy.sum(amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+(x-len(x)/2)**2.0)) + # this integral forumula from http://magicplot.com/wiki/fit_equations + return amplitude * lorentz_hwhm * numpy.pi @classmethod - def _f_pk(cls, x, offset=0.0, gauss_sigma=1.0, lorentz_hwhm=1.0, amplitude=1.0, frac_gauss=0.0): + def _f_pk( + cls, + x, + offset=0.0, + gauss_sigma=1.0, + lorentz_hwhm=1.0, + amplitude=1.0, + frac_gauss=0.0, + ): """ Return the a combined Gaussian/Lorentzian peakshape for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. - + :arg x: array of equal length to :attr:`~nmrpy.data_objects.Fid.data` - + :keyword offset: spectral offset in x @@ -827,39 +987,37 @@ def _f_pk(cls, x, offset=0.0, gauss_sigma=1.0, lorentz_hwhm=1.0, amplitude=1.0, :keyword frac_gauss: fraction of function to be Gaussian (0 -> 1). Note: specifying a Gaussian fraction of 0 will produce a pure Lorentzian and vice - versa. """ - - #validation + versa.""" + + # validation parameters = [offset, gauss_sigma, lorentz_hwhm, amplitude, frac_gauss] if not all(isinstance(i, numbers.Number) for i in parameters): - raise TypeError('Keyword parameters must be numbers.') + raise TypeError("Keyword parameters must be numbers.") if not cls._is_iter(x): - raise TypeError('x must be an iterable') + raise TypeError("x must be an iterable") if not isinstance(x, numpy.ndarray): - x = numpy.array(x) + x = numpy.array(x) if frac_gauss > 1.0: frac_gauss = 1.0 if frac_gauss < 0.0: frac_gauss = 0.0 - + gauss_peak = cls._f_gauss(offset, amplitude, gauss_sigma, x) lorentz_peak = cls._f_lorentz(offset, amplitude, lorentz_hwhm, x) - peak = frac_gauss*gauss_peak + (1-frac_gauss)*lorentz_peak - - return peak - + peak = frac_gauss * gauss_peak + (1 - frac_gauss) * lorentz_peak + return peak @classmethod def _f_makep(cls, data, peaks, frac_gauss=None): """ Make a set of initial peak parameters for deconvolution. - + :arg data: data to be fitted :arg peaks: selected peak positions (see peakpicker()) - + :returns: an array of peaks, each consisting of the following parameters: spectral offset (x) @@ -873,15 +1031,15 @@ def _f_makep(cls, data, peaks, frac_gauss=None): frac_gauss: fraction of function to be Gaussian (0 -> 1) """ if not cls._is_flat_iter(data): - raise TypeError('data must be a flat iterable') + raise TypeError("data must be a flat iterable") if not cls._is_flat_iter(peaks): - raise TypeError('peaks must be a flat iterable') + raise TypeError("peaks must be a flat iterable") if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - + data = numpy.array(data) + p = [] for i in peaks: - pamp = 0.9*abs(data[int(i)]) + pamp = 0.9 * abs(data[int(i)]) single_peak = [i, 10, 0.1, pamp, frac_gauss] p.append(single_peak) return numpy.array(p) @@ -890,95 +1048,99 @@ def _f_makep(cls, data, peaks, frac_gauss=None): def _f_conv(cls, parameterset_list, data): """ Returns the maximum of a convolution of an initial set of lineshapes and the data to be fitted. - - parameterset_list -- a list of parameter lists: n*[[spectral offset (x), - gauss: 2*sigma**2, - lorentz: scale (HWHM), - amplitude: amplitude of peak, + + parameterset_list -- a list of parameter lists: n*[[spectral offset (x), + gauss: 2*sigma**2, + lorentz: scale (HWHM), + amplitude: amplitude of peak, frac_gauss: fraction of function to be Gaussian (0 -> 1)]] where n is the number of peaks data -- 1D spectral array - + """ if not cls._is_flat_iter(data): - raise TypeError('data must be a flat iterable') + raise TypeError("data must be a flat iterable") if not cls._is_iter(parameterset_list): - raise TypeError('parameterset_list must be an iterable') + raise TypeError("parameterset_list must be an iterable") if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - + data = numpy.array(data) + data[data == 0.0] = 1e-6 - x = numpy.arange(len(data), dtype='f8') + x = numpy.arange(len(data), dtype="f8") peaks_init = cls._f_pks(parameterset_list, x) data_convolution = numpy.convolve(data, peaks_init[::-1]) auto_convolution = numpy.convolve(peaks_init, peaks_init[::-1]) - max_data_convolution = numpy.where(data_convolution == data_convolution.max())[0][0] - max_auto_convolution = numpy.where(auto_convolution == auto_convolution.max())[0][0] + max_data_convolution = numpy.where( + data_convolution == data_convolution.max() + )[0][0] + max_auto_convolution = numpy.where( + auto_convolution == auto_convolution.max() + )[0][0] return max_data_convolution - max_auto_convolution - @classmethod + @classmethod def _f_pks_list(cls, parameterset_list, x): """ Return a list of peak evaluations for deconvolution. See _f_pk(). - + Keyword arguments: - parameterset_list -- a list of parameter lists: [spectral offset (x), - gauss: 2*sigma**2, - lorentz: scale (HWHM), - amplitude: amplitude of peak, + parameterset_list -- a list of parameter lists: [spectral offset (x), + gauss: 2*sigma**2, + lorentz: scale (HWHM), + amplitude: amplitude of peak, frac_gauss: fraction of function to be Gaussian (0 -> 1)] x -- array of equal length to FID """ if not cls._is_iter_of_iters(parameterset_list): - raise TypeError('Parameter set must be an iterable of iterables') + raise TypeError("Parameter set must be an iterable of iterables") for p in parameterset_list: if not cls._is_iter(p): - raise TypeError('Parameter set must be an iterable') + raise TypeError("Parameter set must be an iterable") if not all(isinstance(i, numbers.Number) for i in p): - raise TypeError('Keyword parameters must be numbers.') + raise TypeError("Keyword parameters must be numbers.") if not cls._is_iter(x): - raise TypeError('x must be an iterable') + raise TypeError("x must be an iterable") if not isinstance(x, numpy.ndarray): - x = numpy.array(x) + x = numpy.array(x) return numpy.array([Fid._f_pk(x, *peak) for peak in parameterset_list]) - - @classmethod + @classmethod def _f_pks(cls, parameterset_list, x): """ Return the sum of a series of peak evaluations for deconvolution. See _f_pk(). - + Keyword arguments: - parameterset_list -- a list of parameter lists: [spectral offset (x), - gauss: 2*sigma**2, - lorentz: scale (HWHM), - amplitude: amplitude of peak, + parameterset_list -- a list of parameter lists: [spectral offset (x), + gauss: 2*sigma**2, + lorentz: scale (HWHM), + amplitude: amplitude of peak, frac_gauss: fraction of function to be Gaussian (0 -> 1)] x -- array of equal length to FID """ - + if not cls._is_iter_of_iters(parameterset_list): - raise TypeError('Parameter set must be an iterable of iterables') + raise TypeError("Parameter set must be an iterable of iterables") for p in parameterset_list: if not cls._is_iter(p): - raise TypeError('Parameter set must be an iterable') + raise TypeError("Parameter set must be an iterable") if not all(isinstance(i, numbers.Number) for i in p): - raise TypeError('Keyword parameters must be numbers.') + raise TypeError("Keyword parameters must be numbers.") if not cls._is_iter(x): - raise TypeError('x must be an iterable') + raise TypeError("x must be an iterable") if not isinstance(x, numpy.ndarray): - x = numpy.array(x) - - peaks = x*0.0 + x = numpy.array(x) + + peaks = x * 0.0 for p in parameterset_list: - peak = cls._f_pk(x, - offset=p[0], - gauss_sigma=p[1], - lorentz_hwhm=p[2], - amplitude=p[3], - frac_gauss=p[4], - ) + peak = cls._f_pk( + x, + offset=p[0], + gauss_sigma=p[1], + lorentz_hwhm=p[2], + amplitude=p[3], + frac_gauss=p[4], + ) peaks += peak return peaks @@ -986,9 +1148,9 @@ def _f_pks(cls, parameterset_list, x): def _f_res(cls, p, data): """ Objective function for deconvolution. Returns residuals of the devonvolution fit. - + x -- array of equal length to FID - + Keyword arguments: p -- lmfit parameters object: offset_n -- spectral offset in x @@ -998,75 +1160,81 @@ def _f_res(cls, p, data): frac_gauss_n -- fraction of function to be Gaussian (0 -> 1) where n is the peak number (zero-indexed) data -- spectrum array - + """ if not isinstance(p, lmfit.parameter.Parameters): - raise TypeError('Parameters must be of type lmfit.parameter.Parameters.') + raise TypeError( + "Parameters must be of type lmfit.parameter.Parameters." + ) if not cls._is_flat_iter(data): - raise TypeError('data must be a flat iterable.') + raise TypeError("data must be a flat iterable.") if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - + data = numpy.array(data) + params = Fid._parameters_to_list(p) - x = numpy.arange(len(data), dtype='f8') - res = data-cls._f_pks(params, x) + x = numpy.arange(len(data), dtype="f8") + res = data - cls._f_pks(params, x) return res @classmethod - def _f_fitp(cls, data, peaks, frac_gauss=None, method='leastsq'): + def _f_fitp(cls, data, peaks, frac_gauss=None, method="leastsq"): """Fit a section of spectral data with a combination of Gaussian/Lorentzian peaks for deconvolution. - + Keyword arguments: peaks -- selected peak positions (see peakpicker()) frac_gauss -- fraction of fitted function to be Gaussian (1 - Guassian, 0 - Lorentzian) - + returns: fits -- list of fitted peak parameter sets - + Note: peaks are fitted by default using the Levenberg-Marquardt algorithm[1]. Other fitting algorithms are available (http://cars9.uchicago.edu/software/python/lmfit/fitting.html#choosing-different-fitting-methods). - + [1] Marquardt, Donald W. 'An algorithm for least-squares estimation of nonlinear parameters.' Journal of the Society for Industrial & Applied Mathematics 11.2 (1963): 431-441. """ data = numpy.real(data) if not cls._is_flat_iter(data): - raise TypeError('data must be a flat iterable') + raise TypeError("data must be a flat iterable") if not cls._is_flat_iter(peaks): - raise TypeError('peaks must be a flat iterable') - if any(peak > (len(data)-1) for peak in peaks): - raise ValueError('peaks must be within the length of data.') + raise TypeError("peaks must be a flat iterable") + if any(peak > (len(data) - 1) for peak in peaks): + raise ValueError("peaks must be within the length of data.") if not isinstance(data, numpy.ndarray): - data = numpy.array(data) + data = numpy.array(data) p = cls._f_makep(data, peaks, frac_gauss=0.5) init_ref = cls._f_conv(p, data) - if any(peaks+init_ref < 0) or any(peaks+init_ref > len(data)-1): - init_ref = 0 - if frac_gauss==None: - p = cls._f_makep(data, peaks+init_ref, frac_gauss=0.5) + if any(peaks + init_ref < 0) or any(peaks + init_ref > len(data) - 1): + init_ref = 0 + if frac_gauss == None: + p = cls._f_makep(data, peaks + init_ref, frac_gauss=0.5) else: - p = cls._f_makep(data, peaks+init_ref, frac_gauss=frac_gauss) - + p = cls._f_makep(data, peaks + init_ref, frac_gauss=frac_gauss) + params = lmfit.Parameters() for parset in range(len(p)): - current_parset = dict(zip(['offset', 'sigma', 'hwhm', 'amplitude', 'frac_gauss'], p[parset])) - for k,v in current_parset.items(): - par_name = '%s_%i'%(k, parset) - params.add(name=par_name, - value=v, - vary=True, - min=0.0) - if 'offset' in par_name: - params[par_name].max = len(data)-1 - if 'frac_gauss' in par_name: + current_parset = dict( + zip( + ["offset", "sigma", "hwhm", "amplitude", "frac_gauss"], + p[parset], + ) + ) + for k, v in current_parset.items(): + par_name = "%s_%i" % (k, parset) + params.add(name=par_name, value=v, vary=True, min=0.0) + if "offset" in par_name: + params[par_name].max = len(data) - 1 + if "frac_gauss" in par_name: params[par_name].max = 1.0 if frac_gauss is not None: params[par_name].vary = False - #if 'sigma' in par_name or 'hwhm' in par_name: - # params[par_name].max = 0.01*current_parset['amplitude'] - if 'amplitude' in par_name: - params[par_name].max = 2.0*data.max() - + # if 'sigma' in par_name or 'hwhm' in par_name: + # params[par_name].max = 0.01*current_parset['amplitude'] + if "amplitude" in par_name: + params[par_name].max = 2.0 * data.max() + try: - mz = lmfit.minimize(cls._f_res, params, args=([data]), method=method) + mz = lmfit.minimize( + cls._f_res, params, args=([data]), method=method + ) fits = Fid._parameters_to_list(mz.params) except: fits = None @@ -1074,47 +1242,60 @@ def _f_fitp(cls, data, peaks, frac_gauss=None, method='leastsq'): @classmethod def _parameters_to_list(cls, p): - n_pks = int(len(p)/5) + n_pks = int(len(p) / 5) params = [] for i in range(n_pks): - current_params = [p['%s_%s'%(par, i)].value for par in ['offset', 'sigma', 'hwhm', 'amplitude', 'frac_gauss']] + current_params = [ + p["%s_%s" % (par, i)].value + for par in [ + "offset", + "sigma", + "hwhm", + "amplitude", + "frac_gauss", + ] + ] params.append(current_params) return params - @classmethod def _deconv_datum(cls, list_parameters): if len(list_parameters) != 5: - raise ValueError('list_parameters must consist of five objects.') - if (type(list_parameters[1]) == list and len(list_parameters[1]) == 0) or \ - (type(list_parameters[2]) == list and len(list_parameters[2]) == 0): + raise ValueError("list_parameters must consist of five objects.") + if ( + type(list_parameters[1]) == list and len(list_parameters[1]) == 0 + ) or ( + type(list_parameters[2]) == list and len(list_parameters[2]) == 0 + ): return [] datum, peaks, ranges, frac_gauss, method = list_parameters if not cls._is_iter_of_iters(ranges): - raise TypeError('ranges must be an iterable of iterables') + raise TypeError("ranges must be an iterable of iterables") if not all(len(rng) == 2 for rng in ranges): - raise ValueError('ranges must contain two values.') + raise ValueError("ranges must contain two values.") if not all(rng[0] != rng[1] for rng in ranges): - raise ValueError('data_index must contain different values.') + raise ValueError("data_index must contain different values.") if not isinstance(datum, numpy.ndarray): - datum = numpy.array(datum) + datum = numpy.array(datum) if datum.dtype in cls._complex_dtypes: - raise TypeError('data must be not be complex.') + raise TypeError("data must be not be complex.") fit = [] for j in zip(peaks, ranges): - d_slice = datum[j[1][0]:j[1][1]] - p_slice = j[0]-j[1][0] - f = cls._f_fitp(d_slice, p_slice, frac_gauss=frac_gauss, method=method) + d_slice = datum[j[1][0] : j[1][1]] + p_slice = j[0] - j[1][0] + f = cls._f_fitp( + d_slice, p_slice, frac_gauss=frac_gauss, method=method + ) f = numpy.array(f).transpose() f[0] += j[1][0] f = f.transpose() fit.append(f) return fit - def deconv(self, method='leastsq', frac_gauss=0.0): + def deconv(self, method="leastsq", frac_gauss=0.0): """ Deconvolute :attr:`~nmrpy.data_obects.Fid.data` object by fitting a @@ -1126,32 +1307,39 @@ def deconv(self, method='leastsq', frac_gauss=0.0): :keyword frac_gauss: (0-1) determines the Gaussian fraction of the peaks. Setting this argument to None will fit this parameter as well. :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: - + Nelder-Mead (nelder) - + L-BFGS-B (l-bfgs-b) - + Conjugate Gradient (cg) - + Powell (powell) - + Newton-CG (newton) - + """ if not len(self.data): - raise AttributeError('data does not exist.') + raise AttributeError("data does not exist.") if self.data.dtype in self._complex_dtypes: - raise TypeError('data must be not be complex.') + raise TypeError("data must be not be complex.") if self.peaks is None: - raise AttributeError('peaks must be picked.') + raise AttributeError("peaks must be picked.") if self.ranges is None: - raise AttributeError('ranges must be specified.') - print('deconvoluting {}'.format(self.id)) - list_parameters = [self.data, self._grouped_index_peaklist, self._index_ranges, frac_gauss, method] - self._deconvoluted_peaks = numpy.array([j for i in Fid._deconv_datum(list_parameters) for j in i]) - print('deconvolution completed') - + raise AttributeError("ranges must be specified.") + print("deconvoluting {}".format(self.id)) + list_parameters = [ + self.data, + self._grouped_index_peaklist, + self._index_ranges, + frac_gauss, + method, + ] + self._deconvoluted_peaks = numpy.array( + [j for i in Fid._deconv_datum(list_parameters) for j in i] + ) + print("deconvolution completed") def plot_ppm(self, **kwargs): """ @@ -1161,7 +1349,7 @@ def plot_ppm(self, **kwargs): :keyword lower_ppm: lower spectral bound in ppm - :keyword lw: linewidth of plot + :keyword lw: linewidth of plot :keyword colour: colour of the plot """ @@ -1178,7 +1366,7 @@ def plot_deconv(self, **kwargs): :keyword lower_ppm: lower spectral bound in ppm - :keyword lw: linewidth of plot + :keyword lw: linewidth of plot :keyword colour: colour of the plot @@ -1187,14 +1375,15 @@ def plot_deconv(self, **kwargs): :keyword residual_colour: colour of the residual signal after subtracting deconvoluted peaks """ if not len(self._deconvoluted_peaks): - raise AttributeError('deconvolution not yet performed') + raise AttributeError("deconvolution not yet performed") plt = Plot() plt._plot_deconv(self, **kwargs) setattr(self, plt.id, plt) pyplot.show() - + + class FidArray(Base): - ''' + """ This object collects several :class:`~nmrpy.data_objects.Fid` objects into an array, and it contains all the processing methods necessary for bulk @@ -1207,9 +1396,36 @@ class FidArray(Base): :class:`~nmrpy.data_objects.FidArray` with a unique ID of the form 'fidXX', where 'XX' is an increasing integer . - ''' + """ + + def __init__(self): + _now = str(datetime.now()) + self.data_model = self.lib.NMRpy( + datetime_created=_now, + datetime_modified=_now, + ) + del _now + + @property + def data_model(self): + return self.__data_model + + @data_model.setter + def data_model(self, data_model: DataModel): + if not isinstance(data_model, DataModel): + raise AttributeError( + f"Parameter `data_model` has to be of type `sdrdm.DataModel`, got {type(data_model)} instead." + ) + self.__data_model = data_model + self.__data_model.datetime_modified = str(datetime.now()) + + @data_model.deleter + def data_model(self): + del self.__data_model + print("The current data model has been deleted.") + def __str__(self): - return 'FidArray of {} FID(s)'.format(len(self.data)) + return "FidArray of {} FID(s)".format(len(self.data)) def get_fid(self, id): """ @@ -1222,20 +1438,28 @@ def get_fid(self, id): try: return getattr(self, id) except AttributeError: - print('{} does not exist.'.format(id)) + print("{} does not exist.".format(id)) def get_fids(self): """ Return a list of all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`. """ - fids = [self.__dict__[id] for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Fid)] + fids = [ + self.__dict__[id] + for id in sorted(self.__dict__) + if isinstance(self.__dict__[id], Fid) + ] return fids def _get_plots(self): """ Return a list of all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.FidArray`. """ - plots = [self.__dict__[id] for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Plot)] + plots = [ + self.__dict__[id] + for id in sorted(self.__dict__) + if isinstance(self.__dict__[id], Plot) + ] return plots def _del_plots(self): @@ -1251,7 +1475,8 @@ def _get_widgets(self): Return a list of all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.FidArray`. """ widgets = [ - id for id in sorted(self.__dict__) + id + for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Phaser) or isinstance(self.__dict__[id], RangeCalibrator) or isinstance(self.__dict__[id], DataPeakRangeSelector) @@ -1286,7 +1511,7 @@ def t(self): t = None if nfids > 0: try: - t = self._params['acqtime'] + t = self._params["acqtime"] except: t = numpy.arange(len(self.get_fids())) return t @@ -1323,7 +1548,7 @@ def add_fid(self, fid): if isinstance(fid, Fid): setattr(self, fid.id, fid) else: - raise AttributeError('FidArray requires Fid object.') + raise AttributeError("FidArray requires Fid object.") def del_fid(self, fid_id): """ @@ -1336,19 +1561,19 @@ def del_fid(self, fid_id): fids = [f.id for f in self.get_fids()] idx = fids.index(fid_id) delattr(self, fid_id) - if hasattr(self, '_params') and self._params is not None: - at = list(self._params['acqtime']) + if hasattr(self, "_params") and self._params is not None: + at = list(self._params["acqtime"]) at.pop(idx) - self._params['acqtime'] = at + self._params["acqtime"] = at else: - raise AttributeError('{} is not an FID object.'.format(fid_id)) + raise AttributeError("{} is not an FID object.".format(fid_id)) else: - raise AttributeError('FID {} does not exist.'.format(fid_id)) + raise AttributeError("FID {} does not exist.".format(fid_id)) def add_fids(self, fids): """ Add a list of :class:`~nmrpy.data_objects.Fid` objects to this :class:`~nmrpy.data_objects.FidArray`. - + :arg fids: a list of :class:`~nmrpy.data_objects.Fid` instances """ if FidArray._is_iter(fids): @@ -1357,7 +1582,7 @@ def add_fids(self, fids): for fid_index in range(num_fids): try: fid = fids[fid_index] - id_str = 'fid{0:0'+zero_fill+'d}' + id_str = "fid{0:0" + zero_fill + "d}" fid.id = id_str.format(fid_index) self.add_fid(fid) except AttributeError as e: @@ -1367,50 +1592,50 @@ def add_fids(self, fids): def from_data(cls, data): """ Instantiate a new :class:`~nmrpy.data_objects.FidArray` object from a 2D data set of spectral arrays. - - :arg data: a 2D data array + + :arg data: a 2D data array """ if not cls._is_iter_of_iters(data): - raise TypeError('data must be an iterable of iterables.') + raise TypeError("data must be an iterable of iterables.") fid_array = cls() fids = [] for fid_index, datum in zip(range(len(data)), data): - fid_id = 'fid%i'%fid_index + fid_id = "fid%i" % fid_index fid = Fid(id=fid_id, data=datum) fids.append(fid) fid_array.add_fids(fids) return fid_array @classmethod - def from_path(cls, fid_path='.', file_format=None, arrayset=None): + def from_path(cls, fid_path=".", file_format=None, arrayset=None): """ Instantiate a new :class:`~nmrpy.data_objects.FidArray` object from a .fid directory. :keyword fid_path: filepath to .fid directory :keyword file_format: 'varian' or 'bruker', usually unnecessary - - :keyword arrayset: (int) array set for interleaved spectra, - user is prompted if not specified + + :keyword arrayset: (int) array set for interleaved spectra, + user is prompted if not specified """ if not file_format: try: - with open(fid_path, 'rb') as f: + with open(fid_path, "rb") as f: return pickle.load(f) except: - print('Not NMRPy data file.') + print("Not NMRPy data file.") importer = Importer(fid_path=fid_path) importer.import_fid(arrayset=arrayset) - elif file_format == 'varian': + elif file_format == "varian": importer = VarianImporter(fid_path=fid_path) importer.import_fid() - elif file_format == 'bruker': + elif file_format == "bruker": importer = BrukerImporter(fid_path=fid_path) importer.import_fid(arrayset=arrayset) - elif file_format == 'nmrpy': - with open(fid_path, 'rb') as f: + elif file_format == "nmrpy": + with open(fid_path, "rb") as f: return pickle.load(f) - + if cls._is_iter(importer.data): fid_array = cls.from_data(importer.data) fid_array._file_format = importer._file_format @@ -1420,19 +1645,19 @@ def from_path(cls, fid_path='.', file_format=None, arrayset=None): fid._file_format = fid_array._file_format fid.fid_path = fid_array.fid_path fid._procpar = fid_array._procpar - return fid_array + return fid_array else: - raise IOError('Data could not be imported.') + raise IOError("Data could not be imported.") def zf_fids(self): - """ + """ Zero-fill all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` """ for fid in self.get_fids(): fid.zf() def emhz_fids(self, lb=5.0): - """ + """ Apply line-broadening (apodisation) to all :class:`nmrpy.~data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` :keyword lb: degree of line-broadening in Hz. @@ -1441,7 +1666,7 @@ def emhz_fids(self, lb=5.0): fid.emhz(lb=lb) def ft_fids(self, mp=True, cpus=None): - """ + """ Fourier-transform all FIDs. :keyword mp: parallelise over multiple processors, significantly reducing computation time @@ -1454,14 +1679,14 @@ def ft_fids(self, mp=True, cpus=None): ft_data = self._generic_mp(Fid._ft, list_params, cpus) for fid, datum in zip(fids, ft_data): fid.data = datum - fid._flags['ft'] = True - else: + fid._flags["ft"] = True + else: for fid in self.get_fids(): fid.ft() - print('Fourier-transformation completed') + print("Fourier-transformation completed") def real_fids(self): - """ + """ Discard imaginary component of FID data sets. """ @@ -1469,16 +1694,16 @@ def real_fids(self): fid.real() def norm_fids(self): - """ + """ Normalise FIDs by maximum data value in :attr:`~nmrpy.data_objects.FidArray.data`. """ dmax = self.data.max() for fid in self.get_fids(): - fid.data = fid.data/dmax + fid.data = fid.data / dmax - def phase_correct_fids(self, method='leastsq', mp=True, cpus=None): - """ + def phase_correct_fids(self, method="leastsq", mp=True, cpus=None): + """ Apply automatic phase-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` :keyword method: see :meth:`~nmrpy.data_objects.Fid.phase_correct` @@ -1487,20 +1712,24 @@ def phase_correct_fids(self, method='leastsq', mp=True, cpus=None): :keyword cpus: defines number of CPUs to utilise if 'mp' is set to True """ - if mp: + if mp: fids = self.get_fids() if not all(fid.data.dtype in self._complex_dtypes for fid in fids): - raise TypeError('Only complex data can be phase-corrected.') - if not all(fid._flags['ft'] for fid in fids): - raise ValueError('Only Fourier-transformed data can be phase-corrected.') + raise TypeError("Only complex data can be phase-corrected.") + if not all(fid._flags["ft"] for fid in fids): + raise ValueError( + "Only Fourier-transformed data can be phase-corrected." + ) list_params = [[fid.data, method] for fid in fids] - phased_data = self._generic_mp(Fid._phase_correct, list_params, cpus) + phased_data = self._generic_mp( + Fid._phase_correct, list_params, cpus + ) for fid, datum in zip(fids, phased_data): fid.data = datum else: for fid in self.get_fids(): fid.phase_correct(method=method) - print('phase-correction completed') + print("phase-correction completed") def baseliner_fids(self): """ @@ -1512,17 +1741,18 @@ def baseliner_fids(self): :meth:`~nmrpy.data_objects.Fid.baseline_correction`). """ - plot_label = \ -''' + plot_label = """ Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -''' - plot_title = 'Select data for baseline-correction' - self._baseliner_widget = FidArrayRangeSelector(self, title=plot_title, label=plot_label, voff=0.01) - +""" + plot_title = "Select data for baseline-correction" + self._baseliner_widget = FidArrayRangeSelector( + self, title=plot_title, label=plot_label, voff=0.01 + ) + def baseline_correct_fids(self, deg=2): - """ + """ Apply baseline-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` :keyword deg: degree of the baseline polynomial (see :meth:`~nmrpy.data_objects.Fid.baseline_correct`) @@ -1531,8 +1761,12 @@ def baseline_correct_fids(self, deg=2): try: fid.baseline_correct(deg=deg) except: - print('failed for {}. Perhaps first run baseliner_fids()'.format(fid.id)) - print('baseline-correction completed') + print( + "failed for {}. Perhaps first run baseliner_fids()".format( + fid.id + ) + ) + print("baseline-correction completed") @property def _data_traces(self): @@ -1540,7 +1774,7 @@ def _data_traces(self): @_data_traces.setter def _data_traces(self, data_traces): - self.__data_traces = data_traces + self.__data_traces = data_traces @property def _index_traces(self): @@ -1548,7 +1782,7 @@ def _index_traces(self): @_index_traces.setter def _index_traces(self, index_traces): - self.__index_traces = index_traces + self.__index_traces = index_traces @property def _trace_mask(self): @@ -1556,7 +1790,7 @@ def _trace_mask(self): @_trace_mask.setter def _trace_mask(self, trace_mask): - self.__trace_mask = trace_mask + self.__trace_mask = trace_mask @property def _trace_mean_ppm(self): @@ -1564,8 +1798,8 @@ def _trace_mean_ppm(self): @_trace_mean_ppm.setter def _trace_mean_ppm(self, trace_mean_ppm): - trace_mean_ppm - self.__trace_mean_ppm = trace_mean_ppm + trace_mean_ppm + self.__trace_mean_ppm = trace_mean_ppm @property def integral_traces(self): @@ -1577,10 +1811,12 @@ def integral_traces(self): @integral_traces.setter def integral_traces(self, integral_traces): - self._integral_traces = integral_traces + self._integral_traces = integral_traces - def deconv_fids(self, mp=True, cpus=None, method='leastsq', frac_gauss=0.0): - """ + def deconv_fids( + self, mp=True, cpus=None, method="leastsq", frac_gauss=0.0 + ): + """ Apply deconvolution to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`, using the :attr:`~nmrpy.data_objects.Fid.peaks` and :attr:`~nmrpy.data_objects.Fid.ranges` attribute of each respective :class:`~nmrpy.data_objects.Fid`. :keyword method: see :meth:`~nmrpy.data_objects.Fid.phase_correct` @@ -1589,18 +1825,33 @@ def deconv_fids(self, mp=True, cpus=None, method='leastsq', frac_gauss=0.0): :keyword cpus: defines number of CPUs to utilise if 'mp' is set to True, default is n-1 cores """ - if mp: + if mp: fids = self.get_fids() - if not all(fid._flags['ft'] for fid in fids): - raise ValueError('Only Fourier-transformed data can be deconvoluted.') - list_params = [[fid.data, fid._grouped_index_peaklist, fid._index_ranges, frac_gauss, method] for fid in fids] - deconv_datum = self._generic_mp(Fid._deconv_datum, list_params, cpus) + if not all(fid._flags["ft"] for fid in fids): + raise ValueError( + "Only Fourier-transformed data can be deconvoluted." + ) + list_params = [ + [ + fid.data, + fid._grouped_index_peaklist, + fid._index_ranges, + frac_gauss, + method, + ] + for fid in fids + ] + deconv_datum = self._generic_mp( + Fid._deconv_datum, list_params, cpus + ) for fid, datum in zip(fids, deconv_datum): - fid._deconvoluted_peaks = numpy.array([j for i in datum for j in i]) + fid._deconvoluted_peaks = numpy.array( + [j for i in datum for j in i] + ) else: for fid in self.get_fids(): fid.deconv(frac_gauss=frac_gauss) - print('deconvolution completed') + print("deconvolution completed") def get_masked_integrals(self): """ @@ -1610,16 +1861,15 @@ def get_masked_integrals(self): try: ints = [list(i) for i in self.deconvoluted_integrals] for i in self._trace_mask: - ints_current = numpy.zeros_like(i, dtype='f8') + ints_current = numpy.zeros_like(i, dtype="f8") for j in range(len(i)): if i[j] != -1: ints_current[j] = ints[j].pop(0) result.append(ints_current) except AttributeError: - print('peakpicker_traces() or deconv_fids() probably not yet run.') + print("peakpicker_traces() or deconv_fids() probably not yet run.") return result - def ps_fids(self, p0=0.0, p1=0.0): """ Apply manual phase-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` @@ -1629,19 +1879,18 @@ def ps_fids(self, p0=0.0, p1=0.0): :keyword p1: First order phase in degrees """ for fid in self.get_fids(): - fid.ps(p0=p0, p1=p1) + fid.ps(p0=p0, p1=p1) @staticmethod def _generic_mp(fcn, iterable, cpus): if cpus is None: - cpus = cpu_count()-1 + cpus = cpu_count() - 1 proc_pool = Pool(cpus) result = proc_pool.map(fcn, iterable) proc_pool.close() proc_pool.join() return result - def plot_array(self, **kwargs): """ Plot :attr:`~nmrpy.data_objects.FidArray.data`. @@ -1709,43 +1958,44 @@ def plot_deconv_array(self, **kwargs): """ plt = Plot() - plt._plot_deconv_array(self.get_fids(), - **kwargs) + plt._plot_deconv_array(self.get_fids(), **kwargs) setattr(self, plt.id, plt) - - def calibrate(self, fid_number=None, assign_only_to_index=False, - voff=0.02): + def calibrate( + self, fid_number=None, assign_only_to_index=False, voff=0.02 + ): """ - Instantiate a GUI widget to select a peak and calibrate - spectra in a :class:`~nmrpy.data_objects.FidArray`. - Left-clicking selects a peak. The user is then prompted to enter + Instantiate a GUI widget to select a peak and calibrate + spectra in a :class:`~nmrpy.data_objects.FidArray`. + Left-clicking selects a peak. The user is then prompted to enter the PPM value of that peak for calibration; this will be applied to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`. See also :meth:`~nmrpy.data_objects.Fid.calibrate`. - + :keyword fid_number: list or number, index of :class:`~nmrpy.data_objects.Fid` to use for calibration. If None, the whole data array is plotted. :keyword assign_only_to_index: if True, assigns calibration only to :class:`~nmrpy.data_objects.Fid` objects indexed by fid_number; if False, assigns to all. :keyword voff: vertical offset for spectra """ - plot_label = \ -''' + plot_label = """ Left - select peak -''' - self._calibrate_widget = RangeCalibrator(self, - y_indices=fid_number, - aoti=assign_only_to_index, - voff=voff, - label=plot_label, - ) +""" + self._calibrate_widget = RangeCalibrator( + self, + y_indices=fid_number, + aoti=assign_only_to_index, + voff=voff, + label=plot_label, + ) - def peakpicker(self, fid_number=None, assign_only_to_index=True, voff=0.02): + def peakpicker( + self, fid_number=None, assign_only_to_index=True, voff=0.02 + ): """ - Instantiate peak-picker widget for + Instantiate peak-picker widget for :attr:`~nmrpy.data_objects.Fid.data`, and apply selected :attr:`~nmrpy.data_objects.Fid.peaks` and :attr:`~nmrpy.data_objects.Fid.ranges` to all :class:`~nmrpy.data_objects.Fid` @@ -1759,23 +2009,22 @@ def peakpicker(self, fid_number=None, assign_only_to_index=True, voff=0.02): :keyword voff: vertical offset for spectra """ - plot_label = \ -''' + plot_label = """ Left - select peak Ctrl+Left - delete nearest peak Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -''' - self._peakpicker_widget = DataPeakRangeSelector(self, - y_indices=fid_number, - aoti=assign_only_to_index, - voff=voff, - label=plot_label) +""" + self._peakpicker_widget = DataPeakRangeSelector( + self, + y_indices=fid_number, + aoti=assign_only_to_index, + voff=voff, + label=plot_label, + ) - def peakpicker_traces(self, - voff=0.02, - lw=1): + def peakpicker_traces(self, voff=0.02, lw=1): """ Instantiates a widget to pick peaks and ranges employing a polygon shape (or 'trace'). This is useful for picking peaks that are subject to drift and peaks @@ -1787,22 +2036,21 @@ def peakpicker_traces(self, """ if self.data is None: - raise AttributeError('No FIDs.') - plot_label = \ -''' + raise AttributeError("No FIDs.") + plot_label = """ Left - add trace point Right - finalize trace Ctrl+Left - delete nearest trace Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -''' +""" self._peakpicker_widget = DataTraceRangeSelector( self, voff=voff, lw=lw, label=plot_label, - ) + ) def clear_peaks(self): """ @@ -1824,17 +2072,17 @@ def _generate_trace_mask(self, traces): ppm = [numpy.round(numpy.mean(i[0]), 2) for i in traces] self._trace_mean_ppm = ppm tt = [i[1] for i in traces] - ln = len(self.data) + ln = len(self.data) filled_tt = [] for i in tt: rng = numpy.arange(ln) if len(i) < ln: - rng[~(~(rngmax(i)))] = -1 + rng[~(~(rng < min(i)) * ~(rng > max(i)))] = -1 filled_tt.append(rng) filled_tt = numpy.array(filled_tt) return filled_tt - def _set_all_peaks_ranges_from_traces_and_spans(self, traces, spans): + def _set_all_peaks_ranges_from_traces_and_spans(self, traces, spans): traces = [dict(zip(i[1], i[0])) for i in traces] fids = self.get_fids() fids_i = range(len(self.data)) @@ -1846,15 +2094,14 @@ def _set_all_peaks_ranges_from_traces_and_spans(self, traces, spans): for rng in spans: if peak >= min(rng) and peak <= max(rng): peaks.append(peak) - fids[i].peaks = peaks + fids[i].peaks = peaks ranges = [] - for rng in spans: - if any((peaks>min(rng))*(peaks min(rng)) * (peaks < max(rng))): ranges.append(rng) if ranges == []: ranges = None - fids[i].ranges = ranges - + fids[i].ranges = ranges def _get_all_summed_peakshapes(self): """ @@ -1862,7 +2109,7 @@ def _get_all_summed_peakshapes(self): """ peaks = [] for fid in self.get_fids(): - #x = numpy.arange(len(self.get_fids()[0].data)) + # x = numpy.arange(len(self.get_fids()[0].data)) x = numpy.arange(len(self.get_fids()[0].data)) peaks.append(Fid._f_pks(fid._deconvoluted_peaks, x)) return peaks @@ -1873,7 +2120,7 @@ def _get_all_list_peakshapes(self): """ peaks = [] for fid in self.get_fids(): - #x = numpy.arange(len(self.get_fids()[0].data)) + # x = numpy.arange(len(self.get_fids()[0].data)) x = numpy.arange(len(self.get_fids()[0].data)) peaks.append(Fid._f_pks_list(fid._deconvoluted_peaks, x)) return peaks @@ -1890,7 +2137,7 @@ def _get_truncated_peak_shapes_for_plotting(self): pk_y = [] pk_x = [] for pk in ps: - pk_ind = pk > 0.1*pk.max() + pk_ind = pk > 0.1 * pk.max() pk_x.append(ppm[pk_ind]) pk_y.append(pk[pk_ind]) peakshapes_short_x.append(pk_x) @@ -1910,24 +2157,25 @@ def select_integral_traces(self, voff=0.02, lw=1): :keyword lw: linewidth of plot (1) """ if self.data is None: - raise AttributeError('No FIDs.') - if (self.deconvoluted_integrals==None).any(): - raise AttributeError('No integrals.') + raise AttributeError("No FIDs.") + if (self.deconvoluted_integrals == None).any(): + raise AttributeError("No integrals.") peakshapes = self._get_all_summed_peakshapes() - #pk_x, pk_y = self._get_truncated_peak_shapes_for_plotting() - plot_label = \ -''' + # pk_x, pk_y = self._get_truncated_peak_shapes_for_plotting() + plot_label = """ Left - add trace point Right - finalize trace Ctrl+Left - delete nearest trace Ctrl+Alt+Right - assign -''' - self._select_trace_widget = DataTraceSelector(self, - extra_data=peakshapes, - extra_data_colour='b', - voff=voff, +""" + self._select_trace_widget = DataTraceSelector( + self, + extra_data=peakshapes, + extra_data_colour="b", + voff=voff, label=plot_label, - lw=lw) + lw=lw, + ) def get_integrals_from_traces(self): """ @@ -1935,13 +2183,17 @@ def get_integrals_from_traces(self): :class:`~nmrpy.data_objects.Fid` objects calculated from trace dictionary :attr:`~nmrpy.data_objects.FidArray.integral_traces`. """ - if self.deconvoluted_integrals is None or \ - None in self.deconvoluted_integrals: - raise AttributeError('No integrals.') - if not hasattr(self, '_integral_traces'): - raise AttributeError('No integral traces. First run select_integral_traces().') + if ( + self.deconvoluted_integrals is None + or None in self.deconvoluted_integrals + ): + raise AttributeError("No integrals.") + if not hasattr(self, "_integral_traces"): + raise AttributeError( + "No integral traces. First run select_integral_traces()." + ) integrals_set = {} - decon_set = self.deconvoluted_integrals + decon_set = self.deconvoluted_integrals for i, tr in self.integral_traces.items(): tr_keys = numpy.array([fid for fid in tr.keys()]) tr_vals = numpy.array([val for val in tr.values()]) @@ -1949,9 +2201,16 @@ def get_integrals_from_traces(self): tr_keys = tr_keys[tr_sort] tr_vals = tr_vals[tr_sort] integrals = decon_set[tr_keys, tr_vals] - integrals_set[i] = integrals + integrals_set[i] = integrals return integrals_set + def assign_integrals(self, integrals_set: list) -> dict: + print("~~~ Method under contruction ~~~") + widget_list = [] + for i, j in enumerate(integrals_set): + widget_list.append((i, list(j))) + return SelectMultiple(options=widget_list, description="Integrals:") + def save_to_file(self, filename=None, overwrite=False): """ Save :class:`~nmrpy.data_objects.FidArray` object to file, including all objects owned. @@ -1963,13 +2222,13 @@ def save_to_file(self, filename=None, overwrite=False): """ if filename is None: basename = os.path.split(os.path.splitext(self.fid_path)[0])[-1] - filename = basename+'.nmrpy' + filename = basename + ".nmrpy" if not isinstance(filename, str): - raise TypeError('filename must be a string.') - if filename[-6:] != '.nmrpy': - filename += '.nmrpy' + raise TypeError("filename must be a string.") + if filename[-6:] != ".nmrpy": + filename += ".nmrpy" if os.path.isfile(filename) and not overwrite: - print('File '+filename+' exists, set overwrite=True to force.') + print("File " + filename + " exists, set overwrite=True to force.") return 1 # delete all matplotlib plots to reduce file size self._del_plots() @@ -1979,11 +2238,52 @@ def save_to_file(self, filename=None, overwrite=False): self._del_widgets() for fid in self.get_fids(): fid._del_widgets() - with open(filename, 'wb') as f: + with open(filename, "wb") as f: pickle.dump(self, f) - -class Importer(Base): + def save_data(self, file_format: str, filename=None, overwrite=False): + print("~~~ Method under contruction ~~~") + if file_format.lower() == ("enzymeml" or "nmrml"): + # model = self.data_model.convert_to( + # template=Path(__file__).parent.parent / "links/enzymeml.toml" + # ) + enzymeml = DataModel.from_git( + url="https://github.com/EnzymeML/enzymeml-specifications.git", + tag="markdown-parser-refactor", + ) + doc = enzymeml.EnzymeMLDocument( + name=( + self.data_model.experiment.name + if hasattr(self.data_model.experiment, "name") + else "NMR experiment" + ), + created=self.data_model.datetime_created, + modified=self.data_model.datetime_modified, + ) + model = doc.xml() + elif file_format.lower() == "xml": + model = self.data_model.xml() + elif file_format.lower() == "json": + model = self.data_model.json() + elif file_format.lower() == "yaml": + model = self.data_model.yaml() + elif file_format.lower() == "hdf5": + model = self.data_model.hdf5() + else: + raise AttributeError( + f"Parameter `file_format` expected to be one of `enzymeml`; `nmrml`; `xml`; `json`; `yaml`; `hdf5`, got {file_format} instead." + ) + if not filename: + basename = os.path.split(os.path.splitext(self.fid_path)[0])[-1] + filename = basename + "." + file_format.lower() + if os.path.isfile(filename) and not overwrite: + print("File " + filename + " exists, set overwrite=True to force.") + return 1 + with open(filename, "w") as f: + f.write(model) + + +class Importer(Base): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.data = None @@ -2002,17 +2302,16 @@ def data(self, data): elif Importer._is_iter(data): self.__data = numpy.array([data]) else: - raise TypeError('data must be iterable.') + raise TypeError("data must be iterable.") else: - raise TypeError('data must be complex.') - + raise TypeError("data must be complex.") def import_fid(self, arrayset=None): """ This will first attempt to import Bruker data. Failing that, Varian. """ try: - print('Attempting Bruker') + print("Attempting Bruker") brukerimporter = BrukerImporter(fid_path=self.fid_path) brukerimporter.import_fid(arrayset=arrayset) self.data = brukerimporter.data @@ -2020,45 +2319,50 @@ def import_fid(self, arrayset=None): self._file_format = brukerimporter._file_format return except (FileNotFoundError, OSError): - print('fid_path does not specify a valid .fid directory.') - return + print("fid_path does not specify a valid .fid directory.") + return except (TypeError, IndexError): - print('probably not Bruker data') - try: - print('Attempting Varian') + print("probably not Bruker data") + try: + print("Attempting Varian") varianimporter = VarianImporter(fid_path=self.fid_path) varianimporter.import_fid() self._procpar = varianimporter._procpar - self.data = varianimporter.data + self.data = varianimporter.data self._file_format = varianimporter._file_format return except TypeError: - print('probably not Varian data') + print("probably not Varian data") -class VarianImporter(Importer): +class VarianImporter(Importer): def import_fid(self): try: procpar, data = nmrglue.varian.read(self.fid_path) - self.data = data + self.data = data self._procpar = procpar - self._file_format = 'varian' + self._file_format = "varian" except FileNotFoundError: - print('fid_path does not specify a valid .fid directory.') + print("fid_path does not specify a valid .fid directory.") except OSError: - print('fid_path does not specify a valid .fid directory.') - -class BrukerImporter(Importer): + print("fid_path does not specify a valid .fid directory.") + +class BrukerImporter(Importer): def import_fid(self, arrayset=None): try: - dirs = [int(i) for i in os.listdir(self.fid_path) if \ - os.path.isdir(self.fid_path+os.path.sep+i)] + dirs = [ + int(i) + for i in os.listdir(self.fid_path) + if os.path.isdir(self.fid_path + os.path.sep + i) + ] dirs.sort() dirs = [str(i) for i in dirs] alldata = [] for d in dirs: - procpar, data = nmrglue.bruker.read(self.fid_path+os.path.sep+d) + procpar, data = nmrglue.bruker.read( + self.fid_path + os.path.sep + d + ) alldata.append((procpar, data)) self.alldata = alldata incr = 1 @@ -2070,47 +2374,60 @@ def import_fid(self, arrayset=None): incr += 1 if incr > 1: if arrayset == None: - print('Total of '+str(incr)+' alternating FidArrays found.') - arrayset = input('Which one to import? ') + print( + "Total of " + + str(incr) + + " alternating FidArrays found." + ) + arrayset = input("Which one to import? ") arrayset = int(arrayset) else: arrayset = arrayset if arrayset < 1 or arrayset > incr: - raise ValueError('Select a value between 1 and ' - + str(incr) + '.') + raise ValueError( + "Select a value between 1 and " + str(incr) + "." + ) else: arrayset = 1 self.incr = incr - procpar = alldata[arrayset-1][0] - data = numpy.vstack([d[1] for d in alldata[(arrayset-1)::incr]]) + procpar = alldata[arrayset - 1][0] + data = numpy.vstack( + [d[1] for d in alldata[(arrayset - 1) :: incr]] + ) self.data = data self._procpar = procpar - self._file_format = 'bruker' - self.data = nmrglue.bruker.remove_digital_filter(procpar, self.data) - self._procpar['tdelta'], self._procpar['tcum'],\ - self._procpar['tsingle'] = self._get_time_delta() - self._procpar['arraylength'] = self.data.shape[0] - self._procpar['arrayset'] = arrayset + self._file_format = "bruker" + self.data = nmrglue.bruker.remove_digital_filter( + procpar, self.data + ) + ( + self._procpar["tdelta"], + self._procpar["tcum"], + self._procpar["tsingle"], + ) = self._get_time_delta() + self._procpar["arraylength"] = self.data.shape[0] + self._procpar["arrayset"] = arrayset except FileNotFoundError: - print('fid_path does not specify a valid .fid directory.') + print("fid_path does not specify a valid .fid directory.") except OSError: - print('fid_path does not specify a valid .fid directory.') - + print("fid_path does not specify a valid .fid directory.") + def _get_time_delta(self): td = 0.0 tcum = [] tsingle = [] for i in range(self.incr): - pp = self.alldata[i][0]['acqus'] - sw_hz = pp['SW_h'] - at = pp['TD']/(2*sw_hz) - d1 = pp['D'][1] - nt = pp['NS'] - tot = (at+d1)*nt/60. # convert to mins + pp = self.alldata[i][0]["acqus"] + sw_hz = pp["SW_h"] + at = pp["TD"] / (2 * sw_hz) + d1 = pp["D"][1] + nt = pp["NS"] + tot = (at + d1) * nt / 60.0 # convert to mins td += tot tcum.append(td) tsingle.append(tot) return (td, numpy.array(tcum), numpy.array(tsingle)) -if __name__ == '__main__': + +if __name__ == "__main__": pass diff --git a/nmrpy/datamodel/__init__.py b/nmrpy/datamodel/__init__.py new file mode 100644 index 0000000..d866ab2 --- /dev/null +++ b/nmrpy/datamodel/__init__.py @@ -0,0 +1,3 @@ + +__URL__ = "" +__COMMIT__ = "" diff --git a/nmrpy/datamodel/core/__init__.py b/nmrpy/datamodel/core/__init__.py new file mode 100644 index 0000000..bf9ee94 --- /dev/null +++ b/nmrpy/datamodel/core/__init__.py @@ -0,0 +1,33 @@ +from .nmrpy import NMRpy +from .experiment import Experiment +from .fid import FID +from .parameters import Parameters +from .fidarray import FIDArray +from .citation import Citation +from .person import Person +from .publication import Publication +from .cv import CV +from .term import Term +from .fileformats import FileFormats +from .subjects import Subjects +from .publicationtypes import PublicationTypes +from .identifiertypes import IdentifierTypes + +__doc__ = "" + +__all__ = [ + "NMRpy", + "Experiment", + "FID", + "Parameters", + "FIDArray", + "Citation", + "Person", + "Publication", + "CV", + "Term", + "FileFormats", + "Subjects", + "PublicationTypes", + "IdentifierTypes", +] diff --git a/nmrpy/datamodel/core/citation.py b/nmrpy/datamodel/core/citation.py new file mode 100644 index 0000000..dc21126 --- /dev/null +++ b/nmrpy/datamodel/core/citation.py @@ -0,0 +1,225 @@ +import sdRDM + +from typing import List, Optional +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + +from typing import Any +from pydantic import AnyUrl + +from .term import Term +from .identifiertypes import IdentifierTypes +from .person import Person +from .publication import Publication +from .subjects import Subjects +from .publicationtypes import PublicationTypes + + +@forge_signature +class Citation(sdRDM.DataModel): + + """Container for various types of metadata primarily used in the publication and citation of the dataset.""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("citationINDEX"), + xml="@id", + ) + + title: Optional[str] = Field( + default=None, + description="Title the dataset should have when published.", + ) + + doi: Optional[AnyUrl] = Field( + default=None, + description="DOI pointing to the published dataset", + ) + + description: Optional[str] = Field( + default=None, + description="Description the dataset should have when published.", + ) + + authors: List[Person] = Field( + description="List of authors for this dataset.", + default_factory=ListPlus, + multiple=True, + ) + + subjects: List[Subjects] = Field( + description="List of subjects this dataset belongs to.", + default_factory=ListPlus, + multiple=True, + ) + + keywords: List[Term] = Field( + description="List of CV-based keywords describing the dataset.", + default_factory=ListPlus, + multiple=True, + ) + + topics: List[Term] = Field( + description="List of CV-based topics the dataset addresses.", + default_factory=ListPlus, + multiple=True, + ) + + related_publications: List[Publication] = Field( + description="List of publications relating to this dataset.", + default_factory=ListPlus, + multiple=True, + ) + + notes: Optional[str] = Field( + default=None, + description="Additional notes about the dataset.", + ) + + funding: List[str] = Field( + description="Funding information for this dataset.", + default_factory=ListPlus, + multiple=True, + ) + + license: Optional[str] = Field( + default="CC BY 4.0", + description="License information for this dataset. Defaults to `CC BY 4.0`.", + ) + + def add_to_authors( + self, + last_name: str, + first_name: str, + middle_names: List[str] = ListPlus(), + affiliation: Optional[str] = None, + email: Optional[str] = None, + identifier_type: Optional[IdentifierTypes] = None, + identifier_value: Optional[str] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'Person' to attribute authors + + Args: + id (str): Unique identifier of the 'Person' object. Defaults to 'None'. + last_name (): Family name of the person.. + first_name (): Given name of the person.. + middle_names (): List of middle names of the person.. Defaults to ListPlus() + affiliation (): Institution the Person belongs to.. Defaults to None + email (): Email address of the person.. Defaults to None + identifier_type (): Recognized identifier for the person.. Defaults to None + identifier_value (): Value of the identifier for the person.. Defaults to None + """ + + params = { + "last_name": last_name, + "first_name": first_name, + "middle_names": middle_names, + "affiliation": affiliation, + "email": email, + "identifier_type": identifier_type, + "identifier_value": identifier_value, + } + + if id is not None: + params["id"] = id + + self.authors.append(Person(**params)) + + def add_to_keywords( + self, + name: str, + accession: str, + term_cv_reference: Optional[str] = None, + value: Optional[Any] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'Term' to attribute keywords + + Args: + id (str): Unique identifier of the 'Term' object. Defaults to 'None'. + name (): The preferred name of the term associated with the given accession number.. + accession (): Accession number of the term in the controlled vocabulary.. + term_cv_reference (): Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset.. Defaults to None + value (): Value of the term, if applicable.. Defaults to None + """ + + params = { + "name": name, + "accession": accession, + "term_cv_reference": term_cv_reference, + "value": value, + } + + if id is not None: + params["id"] = id + + self.keywords.append(Term(**params)) + + def add_to_topics( + self, + name: str, + accession: str, + term_cv_reference: Optional[str] = None, + value: Optional[Any] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'Term' to attribute topics + + Args: + id (str): Unique identifier of the 'Term' object. Defaults to 'None'. + name (): The preferred name of the term associated with the given accession number.. + accession (): Accession number of the term in the controlled vocabulary.. + term_cv_reference (): Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset.. Defaults to None + value (): Value of the term, if applicable.. Defaults to None + """ + + params = { + "name": name, + "accession": accession, + "term_cv_reference": term_cv_reference, + "value": value, + } + + if id is not None: + params["id"] = id + + self.topics.append(Term(**params)) + + def add_to_related_publications( + self, + type: PublicationTypes, + title: str, + authors: List[Person] = ListPlus(), + year: Optional[int] = None, + doi: Optional[AnyUrl] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'Publication' to attribute related_publications + + Args: + id (str): Unique identifier of the 'Publication' object. Defaults to 'None'. + type (): Nature of the publication.. + title (): Title of the publication.. + authors (): Authors of the publication.. Defaults to ListPlus() + year (): Year of publication.. Defaults to None + doi (): The DOI pointing to the publication.. Defaults to None + """ + + params = { + "type": type, + "title": title, + "authors": authors, + "year": year, + "doi": doi, + } + + if id is not None: + params["id"] = id + + self.related_publications.append(Publication(**params)) diff --git a/nmrpy/datamodel/core/cv.py b/nmrpy/datamodel/core/cv.py new file mode 100644 index 0000000..86735eb --- /dev/null +++ b/nmrpy/datamodel/core/cv.py @@ -0,0 +1,33 @@ +import sdRDM + +from pydantic import Field +from sdRDM.base.utils import forge_signature, IDGenerator + +from pydantic import AnyUrl + + +@forge_signature +class CV(sdRDM.DataModel): + + """lorem ipsum""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("cvINDEX"), + xml="@id", + ) + + vocabulary: str = Field( + ..., + description="Name of the CV used.", + ) + + version: str = Field( + ..., + description="Version of the CV used.", + ) + + url: AnyUrl = Field( + ..., + description="URL pointing to the CV used.", + ) diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py new file mode 100644 index 0000000..1f78eca --- /dev/null +++ b/nmrpy/datamodel/core/experiment.py @@ -0,0 +1,65 @@ +import sdRDM + +from typing import List, Optional +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + + +from .fidarray import FIDArray +from .fid import FID +from .parameters import Parameters + + +@forge_signature +class Experiment(sdRDM.DataModel): + + """Rohdaten -> Zwischenschritte nur nennen + interessante Parameter -> Endergebnis; Peaklist + Rangelist; rapidly pulsed (if then +calibration factor) vs fully relaxed + Also preparation of EnzymeML doc""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("experimentINDEX"), + xml="@id", + ) + + name: str = Field( + ..., + description="A descriptive name for the overarching experiment.", + ) + + fid: List[FID] = Field( + description="A single NMR spectrum.", + default_factory=ListPlus, + multiple=True, + ) + + fid_array: Optional[FIDArray] = Field( + default=None, + description="Multiple NMR spectra to be processed together.", + ) + + def add_to_fid( + self, + data: List[float] = ListPlus(), + parameters: Optional[Parameters] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'FID' to attribute fid + + Args: + id (str): Unique identifier of the 'FID' object. Defaults to 'None'. + data (): Spectral data from numpy array.. Defaults to ListPlus() + parameters (): Contains commonly-used NMR parameters.. Defaults to None + """ + + params = { + "data": data, + "parameters": parameters, + } + + if id is not None: + params["id"] = id + + self.fid.append(FID(**params)) diff --git a/nmrpy/datamodel/core/fid.py b/nmrpy/datamodel/core/fid.py new file mode 100644 index 0000000..143b833 --- /dev/null +++ b/nmrpy/datamodel/core/fid.py @@ -0,0 +1,32 @@ +import sdRDM + +from typing import List, Optional +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + + +from .parameters import Parameters + + +@forge_signature +class FID(sdRDM.DataModel): + + """Container for a single NMR spectrum.""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("fidINDEX"), + xml="@id", + ) + + data: List[float] = Field( + description="Spectral data from numpy array.", + default_factory=ListPlus, + multiple=True, + ) + + parameters: Optional[Parameters] = Field( + default=None, + description="Contains commonly-used NMR parameters.", + ) diff --git a/nmrpy/datamodel/core/fidarray.py b/nmrpy/datamodel/core/fidarray.py new file mode 100644 index 0000000..48b4b49 --- /dev/null +++ b/nmrpy/datamodel/core/fidarray.py @@ -0,0 +1,24 @@ +import sdRDM + +from typing import List +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + + +@forge_signature +class FIDArray(sdRDM.DataModel): + + """Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back.}""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("fidarrayINDEX"), + xml="@id", + ) + + fids: List[str] = Field( + description="List of `FID.id` belonging to this array.", + multiple=True, + default_factory=ListPlus, + ) diff --git a/nmrpy/datamodel/core/fileformats.py b/nmrpy/datamodel/core/fileformats.py new file mode 100644 index 0000000..bf80f78 --- /dev/null +++ b/nmrpy/datamodel/core/fileformats.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class FileFormats(Enum): + VARIAN = "varian" + BRUKER = "bruker" + NONE = None diff --git a/nmrpy/datamodel/core/identifiertypes.py b/nmrpy/datamodel/core/identifiertypes.py new file mode 100644 index 0000000..f4bf8fe --- /dev/null +++ b/nmrpy/datamodel/core/identifiertypes.py @@ -0,0 +1,5 @@ +from enum import Enum + + +class IdentifierTypes(Enum): + ORCID = "ORCID" diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py new file mode 100644 index 0000000..4b68b75 --- /dev/null +++ b/nmrpy/datamodel/core/nmrpy.py @@ -0,0 +1,45 @@ +import sdRDM + +from typing import Optional +from pydantic import Field +from sdRDM.base.utils import forge_signature, IDGenerator + +from datetime import datetime + +from .citation import Citation +from .experiment import Experiment + + +@forge_signature +class NMRpy(sdRDM.DataModel): + + """Root element of the NMRpy data model.""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("nmrpyINDEX"), + xml="@id", + ) + + datetime_created: datetime = Field( + ..., + description="Date and time this dataset has been created.", + ) + + datetime_modified: Optional[datetime] = Field( + default=None, + description="Date and time this dataset has last been modified.", + ) + + experiment: Optional[Experiment] = Field( + default=None, + description="List of experiments associated with this dataset.", + ) + + citation: Optional[Citation] = Field( + default=None, + description=( + "Relevant information regarding the publication and citation of this" + " dataset." + ), + ) diff --git a/nmrpy/datamodel/core/parameters.py b/nmrpy/datamodel/core/parameters.py new file mode 100644 index 0000000..2062e25 --- /dev/null +++ b/nmrpy/datamodel/core/parameters.py @@ -0,0 +1,70 @@ +import sdRDM + +from typing import List, Optional +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + + +@forge_signature +class Parameters(sdRDM.DataModel): + + """Container for relevant NMR parameters.""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("parametersINDEX"), + xml="@id", + ) + + acquisition_time: Optional[float] = Field( + default=None, + description="at", + ) + + relaxation_time: Optional[float] = Field( + default=None, + description="d1", + ) + + repetition_time: Optional[float] = Field( + default=None, + description="rt = at + d1", + ) + + number_of_transients: List[float] = Field( + description="nt", + default_factory=ListPlus, + multiple=True, + ) + + acquisition_times_array: List[float] = Field( + description="acqtime = [nt, 2nt, ..., rt x nt]", + default_factory=ListPlus, + multiple=True, + ) + + spectral_width_ppm: Optional[float] = Field( + default=None, + description="sw", + ) + + spectral_width_hz: Optional[float] = Field( + default=None, + description="sw_hz", + ) + + spectrometer_frequency: Optional[float] = Field( + default=None, + description="sfrq", + ) + + reference_frequency: Optional[float] = Field( + default=None, + description="reffrq", + ) + + spectral_width_left: Optional[float] = Field( + default=None, + description="sw_left", + ) diff --git a/nmrpy/datamodel/core/person.py b/nmrpy/datamodel/core/person.py new file mode 100644 index 0000000..dcfbaef --- /dev/null +++ b/nmrpy/datamodel/core/person.py @@ -0,0 +1,57 @@ +import sdRDM + +from typing import List, Optional +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + + +from .identifiertypes import IdentifierTypes + + +@forge_signature +class Person(sdRDM.DataModel): + + """Container for information regarding a person that worked on an experiment.""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("personINDEX"), + xml="@id", + ) + + last_name: str = Field( + ..., + description="Family name of the person.", + ) + + first_name: str = Field( + ..., + description="Given name of the person.", + ) + + middle_names: List[str] = Field( + description="List of middle names of the person.", + default_factory=ListPlus, + multiple=True, + ) + + affiliation: Optional[str] = Field( + default=None, + description="Institution the Person belongs to.", + ) + + email: Optional[str] = Field( + default=None, + description="Email address of the person.", + ) + + identifier_type: Optional[IdentifierTypes] = Field( + default=None, + description="Recognized identifier for the person.", + ) + + identifier_value: Optional[str] = Field( + default=None, + description="Value of the identifier for the person.", + ) diff --git a/nmrpy/datamodel/core/publication.py b/nmrpy/datamodel/core/publication.py new file mode 100644 index 0000000..42025c6 --- /dev/null +++ b/nmrpy/datamodel/core/publication.py @@ -0,0 +1,90 @@ +import sdRDM + +from typing import List, Optional +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + +from pydantic import AnyUrl + +from .identifiertypes import IdentifierTypes +from .person import Person +from .publicationtypes import PublicationTypes + + +@forge_signature +class Publication(sdRDM.DataModel): + + """Container for citation information of a relevant publication.""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("publicationINDEX"), + xml="@id", + ) + + type: PublicationTypes = Field( + ..., + description="Nature of the publication.", + ) + + title: str = Field( + ..., + description="Title of the publication.", + ) + + authors: List[Person] = Field( + description="Authors of the publication.", + multiple=True, + default_factory=ListPlus, + ) + + year: Optional[int] = Field( + default=None, + description="Year of publication.", + ) + + doi: Optional[AnyUrl] = Field( + default=None, + description="The DOI pointing to the publication.", + ) + + def add_to_authors( + self, + last_name: str, + first_name: str, + middle_names: List[str] = ListPlus(), + affiliation: Optional[str] = None, + email: Optional[str] = None, + identifier_type: Optional[IdentifierTypes] = None, + identifier_value: Optional[str] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'Person' to attribute authors + + Args: + id (str): Unique identifier of the 'Person' object. Defaults to 'None'. + last_name (): Family name of the person.. + first_name (): Given name of the person.. + middle_names (): List of middle names of the person.. Defaults to ListPlus() + affiliation (): Institution the Person belongs to.. Defaults to None + email (): Email address of the person.. Defaults to None + identifier_type (): Recognized identifier for the person.. Defaults to None + identifier_value (): Value of the identifier for the person.. Defaults to None + """ + + params = { + "last_name": last_name, + "first_name": first_name, + "middle_names": middle_names, + "affiliation": affiliation, + "email": email, + "identifier_type": identifier_type, + "identifier_value": identifier_value, + } + + if id is not None: + params["id"] = id + + self.authors.append(Person(**params)) diff --git a/nmrpy/datamodel/core/publicationtypes.py b/nmrpy/datamodel/core/publicationtypes.py new file mode 100644 index 0000000..f5974ef --- /dev/null +++ b/nmrpy/datamodel/core/publicationtypes.py @@ -0,0 +1,5 @@ +from enum import Enum + + +class PublicationTypes(Enum): + ARTICLE = "Journal article" diff --git a/nmrpy/datamodel/core/subjects.py b/nmrpy/datamodel/core/subjects.py new file mode 100644 index 0000000..d343f01 --- /dev/null +++ b/nmrpy/datamodel/core/subjects.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class Subjects(Enum): + BIOLOGY = "Biology" + CHEMISTRY = "Chemistry" + IT = "Computer and Information Science" + PHYSICS = "Physics" diff --git a/nmrpy/datamodel/core/term.py b/nmrpy/datamodel/core/term.py new file mode 100644 index 0000000..6a48293 --- /dev/null +++ b/nmrpy/datamodel/core/term.py @@ -0,0 +1,44 @@ +import sdRDM + +from typing import Optional +from pydantic import Field +from sdRDM.base.utils import forge_signature, IDGenerator + +from typing import Any + + +@forge_signature +class Term(sdRDM.DataModel): + + """lorem ipsum {Add reference back to term_cv_reference.}""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("termINDEX"), + xml="@id", + ) + + name: str = Field( + ..., + description=( + "The preferred name of the term associated with the given accession number." + ), + ) + + accession: str = Field( + ..., + description="Accession number of the term in the controlled vocabulary.", + ) + + term_cv_reference: Optional[str] = Field( + default=None, + description=( + "Reference to the `CV.id` of a controlled vocabulary that has been defined" + " for this dataset." + ), + ) + + value: Optional[Any] = Field( + default=None, + description="Value of the term, if applicable.", + ) diff --git a/nmrpy/datamodel/schemes/datamodel_schema.md b/nmrpy/datamodel/schemes/datamodel_schema.md new file mode 100644 index 0000000..527eb3a --- /dev/null +++ b/nmrpy/datamodel/schemes/datamodel_schema.md @@ -0,0 +1,121 @@ +```mermaid +classDiagram + NMRpy *-- Experiment + NMRpy *-- Citation + Experiment *-- FID + Experiment *-- FIDArray + FID *-- Parameters + Citation *-- Subjects + Citation *-- Person + Citation *-- Publication + Citation *-- Term + Person *-- IdentifierTypes + Publication *-- PublicationTypes + Publication *-- Person + + class NMRpy { + +datetime datetime_created* + +datetime datetime_modified + +Experiment experiment + +Citation citation + } + + class Experiment { + +string name* + +FID[0..*] fid + +FIDArray fid_array + } + + class FID { + +float[0..*] data + +Parameters parameters + } + + class Parameters { + +float acquisition_time + +float relaxation_time + +float repetition_time + +float[0..*] number_of_transients + +float[0..*] acquisition_times_array + +float spectral_width_ppm + +float spectral_width_hz + +float spectrometer_frequency + +float reference_frequency + +float spectral_width_left + } + + class FIDArray { + +string[0..*] fids* + } + + class Citation { + +string title + +URL doi + +string description + +Person[0..*] authors + +Subjects[0..*] subjects + +Term[0..*] keywords + +Term[0..*] topics + +Publication[0..*] related_publications + +string notes + +string[0..*] funding + +string license + } + + class Person { + +string last_name* + +string first_name* + +string[0..*] middle_names + +string affiliation + +string email + +IdentifierTypes identifier_type + +string identifier_value + } + + class Publication { + +PublicationTypes type* + +string title* + +Person[0..*] authors* + +integer year + +URL doi + } + + class CV { + +string vocabulary* + +string version* + +URL url* + } + + class Term { + +string name* + +string accession* + +string term_cv_reference + +any value + } + + class FileFormats { + << Enumeration >> + +VARIAN + +BRUKER + +NONE + } + + class Subjects { + << Enumeration >> + +BIOLOGY + +CHEMISTRY + +IT + +PHYSICS + } + + class PublicationTypes { + << Enumeration >> + +ARTICLE + } + + class IdentifierTypes { + << Enumeration >> + +ORCID + } + +``` \ No newline at end of file diff --git a/specifications/nmrpy.md b/specifications/nmrpy.md new file mode 100644 index 0000000..dd45c34 --- /dev/null +++ b/specifications/nmrpy.md @@ -0,0 +1,279 @@ +# NMRpy data model + +Python object model specifications based on the [software-driven-rdm](https://github.com/JR-1991/software-driven-rdm) Python library. + + +## Core objects + + +### NMRpy + +Root element of the NMRpy data model. + +- __datetime_created__ + - Type: datetime + - Description: Date and time this dataset has been created. +- datetime_modified + - Type: datetime + - Description: Date and time this dataset has last been modified. +- experiment + - Type: [Experiment](#experiment) + - Description: List of experiments associated with this dataset. +- citation + - Type: [Citation](#citation) + - Description: Relevant information regarding the publication and citation of this dataset. + + +### Experiment + +Rohdaten -> Zwischenschritte nur nennen + interessante Parameter -> Endergebnis; Peaklist + Rangelist; rapidly pulsed (if then +calibration factor) vs fully relaxed +Also preparation of EnzymeML doc + +- __name__ + - Type: string + - Description: A descriptive name for the overarching experiment. +- fid + - Type: [FID](#fid) + - Description: A single NMR spectrum. + - Multiple: True +- fid_array + - Type: [FIDArray](#fidarray) + - Description: Multiple NMR spectra to be processed together. + + +### FID + +Container for a single NMR spectrum. + +- data + - Type: float + - Description: Spectral data from numpy array. + - Multiple: True +- parameters + - Type: [Parameters](#parameters) + - Description: Contains commonly-used NMR parameters. + + +### Parameters + +Container for relevant NMR parameters. + +- acquisition_time + - Type: float + - Description: at +- relaxation_time + - Type: float + - Description: d1 +- repetition_time + - Type: float + - Description: rt = at + d1 +- number_of_transients + - Type: float + - Description: nt + - Multiple: True +- acquisition_times_array + - Type: float + - Description: acqtime = [nt, 2nt, ..., rt x nt] + - Multiple: True +- spectral_width_ppm + - Type: float + - Description: sw +- spectral_width_hz + - Type: float + - Description: sw_hz +- spectrometer_frequency + - Type: float + - Description: sfrq +- reference_frequency + - Type: float + - Description: reffrq +- spectral_width_left + - Type: float + - Description: sw_left + + +### FIDArray + +Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back.} + +- __fids__ + - Type: string + - Description: List of `FID.id` belonging to this array. + - Multiple: True + + +### Citation + +Container for various types of metadata primarily used in the publication and citation of the dataset. + +- title + - Type: string + - Description: Title the dataset should have when published. +- doi + - Type: URL + - Description: DOI pointing to the published dataset +- description + - Type: string + - Description: Description the dataset should have when published. +- authors + - Type: [Person](#person) + - Description: List of authors for this dataset. + - Multiple: True +- subjects + - Type: [Subjects](#subjects) + - Description: List of subjects this dataset belongs to. + - Multiple: True +- keywords + - Type: [Term](#term) + - Description: List of CV-based keywords describing the dataset. + - Multiple: True +- topics + - Type: [Term](#term) + - Description: List of CV-based topics the dataset addresses. + - Multiple: True +- related_publications + - Type: [Publication](#publication) + - Description: List of publications relating to this dataset. + - Multiple: True +- notes + - Type: string + - Description: Additional notes about the dataset. +- funding + - Type: string + - Description: Funding information for this dataset. + - Multiple: True +- license + - Type: string + - Description: License information for this dataset. Defaults to `CC BY 4.0`. + - Default: CC BY 4.0 + + +### Person + +Container for information regarding a person that worked on an experiment. + +- __last_name__ + - Type: string + - Description: Family name of the person. +- __first_name__ + - Type: string + - Description: Given name of the person. +- middle_names + - Type: string + - Description: List of middle names of the person. + - Multiple: True +- affiliation + - Type: string + - Description: Institution the Person belongs to. +- email + - Type: string + - Description: Email address of the person. +- identifier_type + - Type: [IdentifierTypes](#identifiertypes) + - Description: Recognized identifier for the person. +- identifier_value + - Type: string + - Description: Value of the identifier for the person. + + +### Publication + +Container for citation information of a relevant publication. + +- __type__ + - Type: [PublicationTypes](#publicationtypes) + - Description: Nature of the publication. +- __title__ + - Type: string + - Description: Title of the publication. +- __authors__ + - Type: [Person](#person) + - Description: Authors of the publication. + - Multiple: True +- year + - Type: integer + - Description: Year of publication. +- doi + - Type: URL + - Description: The DOI pointing to the publication. + + +## Utility objects + + +### CV + +lorem ipsum + +- __vocabulary__ + - Type: string + - Description: Name of the CV used. +- __version__ + - Type: string + - Description: Version of the CV used. +- __url__ + - Type: URL + - Description: URL pointing to the CV used. + + +### Term + +lorem ipsum {Add reference back to term_cv_reference.} + +- __name__ + - Type: string + - Description: The preferred name of the term associated with the given accession number. +- __accession__ + - Type: string + - Description: Accession number of the term in the controlled vocabulary. +- term_cv_reference + - Type: string + - Description: Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset. +- value + - Type: any + - Description: Value of the term, if applicable. + + + +## Enumerations + + +### FileFormats + +Enumeration containing the file formats accepted by the NMRpy library. + +```python +VARIAN = "varian" +BRUKER = "bruker" +NONE = None +``` + + +### Subjects + +Enumeration containing common subjects (research fields) that implement NMR. + +```python +BIOLOGY = "Biology" +CHEMISTRY = "Chemistry" +IT = "Computer and Information Science" +PHYSICS = "Physics" +``` + + +### PublicationTypes + +Enumeration containing accepted types of publication. + +```python +ARTICLE = "Journal article" +``` + + +### IdentifierTypes + +Enumeration containing recognized identifiers for persons. + +```python +ORCID = "ORCID" +```