From e1686616aca44ea1ebdd70cf8e3c470cbdeffdfd Mon Sep 17 00:00:00 2001
From: Torsten Giess <torsten.giess@ibtb.uni-stuttgart.de>
Date: Tue, 26 Sep 2023 01:35:26 +0200
Subject: [PATCH] Update data model

---
 nmrpy/data_objects.py                       | 137 ++++++++++-------
 nmrpy/datamodel/core/__init__.py            |  16 ++
 nmrpy/datamodel/core/abstractspecies.py     |  71 +++++++++
 nmrpy/datamodel/core/citation.py            |  19 ++-
 nmrpy/datamodel/core/complexdatapoint.py    |  32 ++++
 nmrpy/datamodel/core/cv.py                  |   4 +-
 nmrpy/datamodel/core/datatypes.py           |  10 ++
 nmrpy/datamodel/core/experiment.py          |  32 ++--
 nmrpy/datamodel/core/fid.py                 |  81 +++++++++-
 nmrpy/datamodel/core/fidarray.py            |   7 +-
 nmrpy/datamodel/core/identity.py            |  51 +++++++
 nmrpy/datamodel/core/nmrpy.py               |  11 +-
 nmrpy/datamodel/core/parameters.py          |   3 +-
 nmrpy/datamodel/core/person.py              |   3 +-
 nmrpy/datamodel/core/processingsteps.py     |  81 ++++++++++
 nmrpy/datamodel/core/protein.py             |  57 +++++++
 nmrpy/datamodel/core/publication.py         |   7 +-
 nmrpy/datamodel/core/reactant.py            |  49 +++++++
 nmrpy/datamodel/core/sboterm.py             |  35 +++++
 nmrpy/datamodel/core/term.py                |   3 +-
 nmrpy/datamodel/core/vessel.py              |  52 +++++++
 nmrpy/datamodel/schemes/datamodel_schema.md | 155 +++++++++++++++++++-
 nmrpy/plotting.py                           |  76 ++++++----
 specifications/nmrpy.md                     |  99 ++++++++++++-
 24 files changed, 958 insertions(+), 133 deletions(-)
 create mode 100644 nmrpy/datamodel/core/abstractspecies.py
 create mode 100644 nmrpy/datamodel/core/complexdatapoint.py
 create mode 100644 nmrpy/datamodel/core/datatypes.py
 create mode 100644 nmrpy/datamodel/core/identity.py
 create mode 100644 nmrpy/datamodel/core/processingsteps.py
 create mode 100644 nmrpy/datamodel/core/protein.py
 create mode 100644 nmrpy/datamodel/core/reactant.py
 create mode 100644 nmrpy/datamodel/core/sboterm.py
 create mode 100644 nmrpy/datamodel/core/vessel.py

diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py
index ddf3c98..19993d1 100644
--- a/nmrpy/data_objects.py
+++ b/nmrpy/data_objects.py
@@ -12,6 +12,7 @@
 import pickle
 from ipywidgets import SelectMultiple
 from sdRDM import DataModel
+from nmrpy.datamodel.core import *
 
 
 class Base:
@@ -29,17 +30,17 @@ def __init__(self, *args, **kwargs):
         self._params = None
         self.fid_path = kwargs.get("fid_path", ".")
         self._file_format = None
-        # self.parameters_object = self.lib.Parameters()
 
-    @property
-    def lib(self):
-        try:
-            self.__lib
-        except:
-            self.__lib = DataModel.from_markdown(
-                path=Path(__file__).parent.parent / "specifications"
-            )
-        return self.__lib
+    # Probably not required anymore
+    # @property
+    # def lib(self):
+    #     try:
+    #         self.__lib
+    #     except:
+    #         self.__lib = DataModel.from_markdown(
+    #             path=Path(__file__).parent.parent / "specifications"
+    #         )
+    #     return self.__lib
 
     # @property
     # def parameters_object(self):
@@ -120,24 +121,6 @@ def _procpar(self, procpar):
         elif isinstance(procpar, dict):
             self.__procpar = procpar
             self._params = self._extract_procpar(procpar)
-            # self.parameters_object(
-            #     acquisition_time=self._params.get("at"),
-            #     relaxation_time=self._params.get("d1"),
-            #     repetition_time=self._params.get("rt"),
-            #     spectral_width_ppm=self._params.get("sw"),
-            #     spectral_width_hz=self._params.get("sw_hz"),
-            #     spectrometer_frequency=self._params.get("sfrq"),
-            #     reference_frequency=self._params.get("reffrq"),
-            #     spectral_width_left=self._params.get("sw_left"),
-            # )
-            # for _ in self._params.get("nt"):
-            #     if type(_) is not None:
-            #         self.fid_object.parameters.number_of_transients.append(_)
-            # for _ in self._params.get("acqtime"):
-            #     if type(_) is not None:
-            #         self.fid_object.parameters.acquisition_times_array.append(
-            #             _
-            #         )
         else:
             raise AttributeError("procpar must be a dictionary or None.")
 
@@ -256,7 +239,6 @@ class Fid(Base):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.fid_object = self.lib.FID()
         self.data = kwargs.get("data", [])
         self.peaks = None
         self.ranges = None
@@ -265,6 +247,13 @@ def __init__(self, *args, **kwargs):
         self._flags = {
             "ft": False,
         }
+        self.fid_object = FID(
+            raw_data=[(str(datum)) for datum in self.data],
+            processed_data=[],
+            nmr_parameters=Parameters(),
+            processing_steps=ProcessingSteps(),
+            peak_identities=[],
+        )
 
     def __str__(self):
         return "FID: %s (%i data)" % (self.id, len(self.data))
@@ -278,6 +267,14 @@ def fid_object(self, fid_object):
         if isinstance(fid_object, DataModel):
             self.__fid_object = fid_object
 
+    @property
+    def processing_steps(self):
+        return self.__processing_steps
+
+    @processing_steps.setter
+    def processing_steps(self, processing_steps):
+        raise PermissionError("Forbidden!")
+
     @property
     def data(self):
         """
@@ -289,9 +286,6 @@ def data(self):
     def data(self, data):
         if Fid._is_valid_dataset(data):
             self.__data = numpy.array(data)
-            # for _ in self.__data:
-            #     if type(_) is not None:
-            #         self.fid_object.data.append(float(_))
 
     @property
     def _ppm(self):
@@ -354,18 +348,21 @@ def ranges(self, ranges):
     @property
     def identities(self):
         """
-        Assigned identities corresponding to the various ranges in :attr:`~nmrpy.data_objects.Fid.ranges`.
+        Assigned identities corresponding to the various peaks in :attr:`~nmrpy.data_objects.Fid.peaks`.
         """
-        return self._identitites
+        return self._identities
 
     @identities.setter
     def identities(self, identities):
+        if identities is None:
+            self._identities = None
+            return
         if identities is not None:
             # if not Fid._is_flat_iter(identities):
             #     raise AttributeError("identitites must be a flat iterable")
             if not all(isinstance(i, str) for i in identities):
                 raise AttributeError("identities must be strings")
-            self._identitites = numpy.array(identities)
+            self._identities = numpy.array(identities)
         else:
             self._identities = identities
 
@@ -602,8 +599,8 @@ def zf(self):
 
         """
         self.data = numpy.append(self.data, 0 * self.data)
-        for _ in self.data:
-            self.fid_object.data.append(float(_))
+        self.fid_object.processed_data = [str(datum) for datum in self.data]
+        self.fid_object.processing_steps.is_zero_filled = True
 
     def emhz(self, lb=5.0):
         """
@@ -622,16 +619,17 @@ def emhz(self, lb=5.0):
             )
             * self.data
         )
-        for _ in self.data:
-            self.fid_object.data.append(float(_))
+        self.fid_object.processed_data = [str(datum) for datum in self.data]
+        self.fid_object.processing_steps.is_apodised = True
+        self.fid_object.processing_steps.apodisation_frequency = lb
 
     def real(self):
         """
         Discard imaginary component of :attr:`~nmrpy.data_objects.Fid.data`.
         """
         self.data = numpy.real(self.data)
-        for _ in self.data:
-            self.fid_object.data.append(float(_))
+        self.fid_object.processed_data = [float(datum) for datum in self.data]
+        self.fid_object.processing_steps.is_only_real = True
 
     # GENERAL FUNCTIONS
     def ft(self):
@@ -649,9 +647,10 @@ def ft(self):
         if Fid._is_valid_dataset(self.data):
             list_params = (self.data, self._file_format)
             self.data = Fid._ft(list_params)
-            for _ in self.data:
-                self.fid_object.data.append(float(_))
             self._flags["ft"] = True
+        self.fid_object.processed_data = [str(datum) for datum in self.data]
+        self.fid_object.processing_steps.is_fourier_transformed = True
+        self.fid_object.processing_steps.fourier_transform_type = "FFT"
 
     @classmethod
     def _ft(cls, list_params):
@@ -735,8 +734,8 @@ def phase_correct(self, method="leastsq"):
             )
         print("phasing: %s" % self.id)
         self.data = Fid._phase_correct((self.data, method))
-        for _ in self.data:
-            self.fid_object.data.append(float(_))
+        self.fid_object.processed_data = [str(datum) for datum in self.data]
+        self.fid_object.processing_steps.is_phased = True
 
     @classmethod
     def _phase_correct(cls, list_params):
@@ -809,8 +808,10 @@ def ps(self, p0=0.0, p1=0.0):
         size = len(self.data)
         ph = numpy.exp(1.0j * (p0 + (p1 * numpy.arange(size) / size)))
         self.data = ph * self.data
-        for _ in self.data:
-            self.fid_object.data.append(float(_))
+        self.fid_object.processed_data = [str(datum) for datum in self.data]
+        self.fid_object.processing_steps.is_phased = True
+        self.fid_object.processing_steps.zero_order_phase = p0
+        self.fid_object.processing_steps.first_order_phase = p1
 
     def phaser(self):
         """
@@ -876,8 +877,8 @@ def baseline_correct(self, deg=2):
         self._bl_poly = yp
         data_bl = data - yp
         self.data = numpy.array(data_bl)
-        for _ in self.data:
-            self.fid_object.data.append(float(_))
+        self.fid_object.processed_data = [float(datum) for datum in self.data]
+        self.fid_object.processing_steps.is_baseline_corrected = True
 
     def peakpick(self, thresh=0.1):
         """
@@ -1445,7 +1446,7 @@ class FidArray(Base):
 
     def __init__(self):
         _now = str(datetime.now())
-        self.data_model = self.lib.NMRpy(
+        self.data_model = NMRpy(
             datetime_created=_now,
             datetime_modified=_now,
         )
@@ -1738,6 +1739,10 @@ def ft_fids(self, mp=True, cpus=None):
             for fid, datum in zip(fids, ft_data):
                 fid.data = datum
                 fid._flags["ft"] = True
+                fid.fid_object.processed_data = [str(data) for data in datum]
+                fid.fid_object.processing_steps.is_fourier_transformed = True
+                fid.fid_object.processing_steps.fourier_transform_type = "FFT"
+
         else:
             for fid in self.get_fids():
                 fid.ft()
@@ -1759,6 +1764,11 @@ def norm_fids(self):
         dmax = self.data.max()
         for fid in self.get_fids():
             fid.data = fid.data / dmax
+            fid.fid_object.processed_data = [
+                float(datum) for datum in fid.data
+            ]
+            fid.fid_object.processing_steps.is_normalised = True
+            fid.fid_object.processing_steps.max_value = float(dmax)
 
     def phase_correct_fids(self, method="leastsq", mp=True, cpus=None):
         """
@@ -1784,6 +1794,8 @@ def phase_correct_fids(self, method="leastsq", mp=True, cpus=None):
             )
             for fid, datum in zip(fids, phased_data):
                 fid.data = datum
+                fid.fid_object.processed_data = [str(data) for data in datum]
+                fid.fid_object.processing_steps.is_phased = True
         else:
             for fid in self.get_fids():
                 fid.phase_correct(method=method)
@@ -1906,6 +1918,7 @@ def deconv_fids(
                 fid._deconvoluted_peaks = numpy.array(
                     [j for i in datum for j in i]
                 )
+                fid.fid_object.processing_steps.is_deconvoluted = True
         else:
             for fid in self.get_fids():
                 fid.deconv(frac_gauss=frac_gauss)
@@ -2299,25 +2312,35 @@ def save_to_file(self, filename=None, overwrite=False):
         with open(filename, "wb") as f:
             pickle.dump(self, f)
 
+    # TODO: Will probably create a measurement object for each FID(?)
+    # and add them to the EnzymeML document provided
+    # Issue: How to get species for IdentityAssigner? __init__()?
+    def add_to_enzymeml(self, enzymeml_document=None) -> None:
+        ...
+
+    # TODO: Refactor save_data method
+    # possibly make saving to EnzymeML a get_measurements method
     def save_data(self, file_format: str, filename=None, overwrite=False):
         print("~~~ Method under contruction ~~~")
         if self.force_pyenzyme:
-            import pyenzyme as pe
-
+            try:
+                import pyenzyme as pe
+            except:
+                self.force_pyenzyme = False
+                raise ModuleNotFoundError(
+                    "PyEnzyme is not installed in your current environment. Use EnzymeML data model instead or install PyEnzyme."
+                )
             enzymeml = pe.EnzymeMLDocument(
-                name=self.data_mode.experiment.name
+                name=self.data_model.experiment.name
                 if hasattr(self.data_model.experiment, "name")
                 else "NMR experiment"
             )
             ...
             return 1
         if file_format.lower() == ("enzymeml" or "nmrml"):
-            # model = self.data_model.convert_to(
-            #     template=Path(__file__).parent.parent / "links/enzymeml.toml"
-            # )
             enzymeml = DataModel.from_git(
                 url="https://github.com/EnzymeML/enzymeml-specifications.git",
-                tag="markdown-parser-refactor",
+                tag="linking-refactor",
             )
             doc = enzymeml.EnzymeMLDocument(
                 name=(
diff --git a/nmrpy/datamodel/core/__init__.py b/nmrpy/datamodel/core/__init__.py
index bf9ee94..8479b41 100644
--- a/nmrpy/datamodel/core/__init__.py
+++ b/nmrpy/datamodel/core/__init__.py
@@ -2,16 +2,24 @@
 from .experiment import Experiment
 from .fid import FID
 from .parameters import Parameters
+from .processingsteps import ProcessingSteps
+from .identity import Identity
 from .fidarray import FIDArray
 from .citation import Citation
 from .person import Person
 from .publication import Publication
 from .cv import CV
 from .term import Term
+from .vessel import Vessel
+from .abstractspecies import AbstractSpecies
+from .protein import Protein
+from .reactant import Reactant
 from .fileformats import FileFormats
 from .subjects import Subjects
 from .publicationtypes import PublicationTypes
 from .identifiertypes import IdentifierTypes
+from .sboterm import SBOTerm
+from .datatypes import DataTypes
 
 __doc__ = ""
 
@@ -20,14 +28,22 @@
     "Experiment",
     "FID",
     "Parameters",
+    "ProcessingSteps",
+    "Identity",
     "FIDArray",
     "Citation",
     "Person",
     "Publication",
     "CV",
     "Term",
+    "Vessel",
+    "AbstractSpecies",
+    "Protein",
+    "Reactant",
     "FileFormats",
     "Subjects",
     "PublicationTypes",
     "IdentifierTypes",
+    "SBOTerm",
+    "DataTypes",
 ]
diff --git a/nmrpy/datamodel/core/abstractspecies.py b/nmrpy/datamodel/core/abstractspecies.py
new file mode 100644
index 0000000..57e50eb
--- /dev/null
+++ b/nmrpy/datamodel/core/abstractspecies.py
@@ -0,0 +1,71 @@
+import sdRDM
+
+from typing import Optional, Union
+from pydantic import Field, validator
+from sdRDM.base.utils import forge_signature, IDGenerator
+
+from pydantic import StrictBool
+
+from .vessel import Vessel
+
+
+@forge_signature
+class AbstractSpecies(sdRDM.DataModel):
+    """This object is used to inherit basic attributes common to all species used in the data model."""
+
+    id: Optional[str] = Field(
+        description="Unique identifier of the given object.",
+        default_factory=IDGenerator("abstractspeciesINDEX"),
+        xml="@id",
+    )
+
+    name: str = Field(
+        ...,
+        description="None",
+    )
+
+    vessel_id: Union[Vessel, str] = Field(
+        ...,
+        reference="Vessel.id",
+        description="None",
+    )
+
+    init_conc: Optional[float] = Field(
+        default=None,
+        description="None",
+    )
+
+    constant: StrictBool = Field(
+        ...,
+        description="None",
+    )
+
+    unit: Optional[str] = Field(
+        default=None,
+        description="None",
+    )
+
+    uri: Optional[str] = Field(
+        default=None,
+        description="None",
+    )
+
+    creator_id: Optional[str] = Field(
+        default=None,
+        description="None",
+    )
+
+    @validator("vessel_id")
+    def get_vessel_id_reference(cls, value):
+        """Extracts the ID from a given object to create a reference"""
+
+        from .vessel import Vessel
+
+        if isinstance(value, Vessel):
+            return value.id
+        elif isinstance(value, str):
+            return value
+        else:
+            raise TypeError(
+                f"Expected types [Vessel, str] got '{type(value).__name__}' instead."
+            )
diff --git a/nmrpy/datamodel/core/citation.py b/nmrpy/datamodel/core/citation.py
index 799d198..c574cf3 100644
--- a/nmrpy/datamodel/core/citation.py
+++ b/nmrpy/datamodel/core/citation.py
@@ -5,23 +5,22 @@
 from sdRDM.base.listplus import ListPlus
 from sdRDM.base.utils import forge_signature, IDGenerator
 
-from pydantic import AnyUrl
 from typing import Any
+from pydantic import AnyUrl
 
-from .term import Term
-from .person import Person
-from .subjects import Subjects
 from .publication import Publication
+from .term import Term
 from .publicationtypes import PublicationTypes
+from .subjects import Subjects
+from .person import Person
 from .identifiertypes import IdentifierTypes
 
 
 @forge_signature
 class Citation(sdRDM.DataModel):
-
     """Container for various types of metadata primarily used in the publication and citation of the dataset."""
 
-    id: str = Field(
+    id: Optional[str] = Field(
         description="Unique identifier of the given object.",
         default_factory=IDGenerator("citationINDEX"),
         xml="@id",
@@ -128,6 +127,8 @@ def add_to_authors(
 
         self.authors.append(Person(**params))
 
+        return self.authors[-1]
+
     def add_to_keywords(
         self,
         name: str,
@@ -159,6 +160,8 @@ def add_to_keywords(
 
         self.keywords.append(Term(**params))
 
+        return self.keywords[-1]
+
     def add_to_topics(
         self,
         name: str,
@@ -190,6 +193,8 @@ def add_to_topics(
 
         self.topics.append(Term(**params))
 
+        return self.topics[-1]
+
     def add_to_related_publications(
         self,
         type: PublicationTypes,
@@ -223,3 +228,5 @@ def add_to_related_publications(
             params["id"] = id
 
         self.related_publications.append(Publication(**params))
+
+        return self.related_publications[-1]
diff --git a/nmrpy/datamodel/core/complexdatapoint.py b/nmrpy/datamodel/core/complexdatapoint.py
new file mode 100644
index 0000000..44ce1c2
--- /dev/null
+++ b/nmrpy/datamodel/core/complexdatapoint.py
@@ -0,0 +1,32 @@
+import sdRDM
+
+from typing import Optional
+from pydantic import Field
+from sdRDM.base.utils import forge_signature, IDGenerator
+
+
+@forge_signature
+class ComplexDataPoint(sdRDM.DataModel):
+    """Container for a complex number from the Free Induction Decay."""
+
+    id: Optional[str] = Field(
+        description="Unique identifier of the given object.",
+        default_factory=IDGenerator("complexdatapointINDEX"),
+        xml="@id",
+    )
+
+    real_part: Optional[float] = Field(
+        default=None,
+        description=(
+            "Real part of the complex number. Equivalent to `z.real` with `z` being a"
+            " `complex` number in Python."
+        ),
+    )
+
+    imaginary_part: Optional[float] = Field(
+        default=None,
+        description=(
+            "Imaginary part of the complex number. Equivalent to `z.imag` with `z`"
+            " being a `complex` number in Python."
+        ),
+    )
diff --git a/nmrpy/datamodel/core/cv.py b/nmrpy/datamodel/core/cv.py
index 86735eb..e7e070a 100644
--- a/nmrpy/datamodel/core/cv.py
+++ b/nmrpy/datamodel/core/cv.py
@@ -1,5 +1,6 @@
 import sdRDM
 
+from typing import Optional
 from pydantic import Field
 from sdRDM.base.utils import forge_signature, IDGenerator
 
@@ -8,10 +9,9 @@
 
 @forge_signature
 class CV(sdRDM.DataModel):
-
     """lorem ipsum"""
 
-    id: str = Field(
+    id: Optional[str] = Field(
         description="Unique identifier of the given object.",
         default_factory=IDGenerator("cvINDEX"),
         xml="@id",
diff --git a/nmrpy/datamodel/core/datatypes.py b/nmrpy/datamodel/core/datatypes.py
new file mode 100644
index 0000000..92b2754
--- /dev/null
+++ b/nmrpy/datamodel/core/datatypes.py
@@ -0,0 +1,10 @@
+from enum import Enum
+
+
+class DataTypes(Enum):
+    CONCENTRATION = "conc"
+    ABSORPTION = "abs"
+    FEED = "feed"
+    BIOMASS = "biomass"
+    CONVERSION = "conversion"
+    PEAK_AREA = "peak-area"
diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py
index 3af5c83..793de75 100644
--- a/nmrpy/datamodel/core/experiment.py
+++ b/nmrpy/datamodel/core/experiment.py
@@ -1,23 +1,24 @@
 import sdRDM
 
-from typing import List, Optional
+from typing import Optional, Union, List
 from pydantic import Field
 from sdRDM.base.listplus import ListPlus
 from sdRDM.base.utils import forge_signature, IDGenerator
 
 
-from .fidarray import FIDArray
+from .processingsteps import ProcessingSteps
+from .identity import Identity
 from .parameters import Parameters
 from .fid import FID
+from .fidarray import FIDArray
 
 
 @forge_signature
 class Experiment(sdRDM.DataModel):
-
     """Rohdaten -> Zwischenschritte nur nennen + interessante Parameter -> Endergebnis; Peaklist + Rangelist; rapidly pulsed (if then +calibration factor) vs fully relaxed
     Also preparation of EnzymeML doc"""
 
-    id: str = Field(
+    id: Optional[str] = Field(
         description="Unique identifier of the given object.",
         default_factory=IDGenerator("experimentINDEX"),
         xml="@id",
@@ -41,8 +42,11 @@ class Experiment(sdRDM.DataModel):
 
     def add_to_fid(
         self,
-        data: List[float] = ListPlus(),
-        parameters: Optional[Parameters] = None,
+        raw_data: List[str] = ListPlus(),
+        processed_data: List[Union[str, float]] = ListPlus(),
+        nmr_parameters: Optional[Parameters] = None,
+        processing_steps: Optional[ProcessingSteps] = None,
+        peak_identities: List[Identity] = ListPlus(),
         id: Optional[str] = None,
     ) -> None:
         """
@@ -50,16 +54,24 @@ def add_to_fid(
 
         Args:
             id (str): Unique identifier of the 'FID' object. Defaults to 'None'.
-            data (): Spectral data from numpy array.. Defaults to ListPlus()
-            parameters (): Contains commonly-used NMR parameters.. Defaults to None
+            raw_data (): Complex spectral data from numpy array as string of format `{array.real}+{array.imag}j`.. Defaults to ListPlus()
+            processed_data (): Processed data array.. Defaults to ListPlus()
+            nmr_parameters (): Contains commonly-used NMR parameters.. Defaults to None
+            processing_steps (): Contains the processing steps performed, as well as the parameters used for them.. Defaults to None
+            peak_identities (): Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species.. Defaults to ListPlus()
         """
 
         params = {
-            "data": data,
-            "parameters": parameters,
+            "raw_data": raw_data,
+            "processed_data": processed_data,
+            "nmr_parameters": nmr_parameters,
+            "processing_steps": processing_steps,
+            "peak_identities": peak_identities,
         }
 
         if id is not None:
             params["id"] = id
 
         self.fid.append(FID(**params))
+
+        return self.fid[-1]
diff --git a/nmrpy/datamodel/core/fid.py b/nmrpy/datamodel/core/fid.py
index 143b833..c92eb0b 100644
--- a/nmrpy/datamodel/core/fid.py
+++ b/nmrpy/datamodel/core/fid.py
@@ -1,32 +1,99 @@
 import sdRDM
 
-from typing import List, Optional
+from typing import Optional, Union, List
 from pydantic import Field
 from sdRDM.base.listplus import ListPlus
 from sdRDM.base.utils import forge_signature, IDGenerator
 
+from pydantic.types import FrozenSet
 
+from .processingsteps import ProcessingSteps
+from .abstractspecies import AbstractSpecies
+from .protein import Protein
+from .identity import Identity
 from .parameters import Parameters
+from .reactant import Reactant
 
 
 @forge_signature
 class FID(sdRDM.DataModel):
-
     """Container for a single NMR spectrum."""
 
-    id: str = Field(
+    id: Optional[str] = Field(
         description="Unique identifier of the given object.",
         default_factory=IDGenerator("fidINDEX"),
         xml="@id",
     )
 
-    data: List[float] = Field(
-        description="Spectral data from numpy array.",
+    raw_data: List[str] = Field(
+        description=(
+            "Complex spectral data from numpy array as string of format"
+            " `{array.real}+{array.imag}j`."
+        ),
+        default_factory=ListPlus,
+        multiple=True,
+    )
+
+    processed_data: List[Union[str, float]] = Field(
+        description="Processed data array.",
         default_factory=ListPlus,
         multiple=True,
     )
 
-    parameters: Optional[Parameters] = Field(
-        default=None,
+    nmr_parameters: Optional[Parameters] = Field(
+        default=Parameters(),
         description="Contains commonly-used NMR parameters.",
     )
+
+    processing_steps: Optional[ProcessingSteps] = Field(
+        default=ProcessingSteps(),
+        description=(
+            "Contains the processing steps performed, as well as the parameters used"
+            " for them."
+        ),
+    )
+
+    peak_identities: List[Identity] = Field(
+        description=(
+            "Container holding and mapping integrals resulting from peaks and their"
+            " ranges to EnzymeML species."
+        ),
+        default_factory=ListPlus,
+        multiple=True,
+    )
+
+    def add_to_peak_identities(
+        self,
+        name: str,
+        enzymeml_species: Union[AbstractSpecies, Protein, Reactant, None] = None,
+        associated_peaks: List[float] = ListPlus(),
+        associated_ranges: List[FrozenSet] = ListPlus(),
+        associated_integrals: List[float] = ListPlus(),
+        id: Optional[str] = None,
+    ) -> None:
+        """
+        This method adds an object of type 'Identity' to attribute peak_identities
+
+        Args:
+            id (str): Unique identifier of the 'Identity' object. Defaults to 'None'.
+            name (): Descriptive name for the species.
+            enzymeml_species (): A species object from an EnzymeML document.. Defaults to None
+            associated_peaks (): Peaks belonging to the given species. Defaults to ListPlus()
+            associated_ranges (): Sets of ranges belonging to the given peaks. Defaults to ListPlus()
+            associated_integrals (): Integrals resulting from the given peaks and ranges of a species. Defaults to ListPlus()
+        """
+
+        params = {
+            "name": name,
+            "enzymeml_species": enzymeml_species,
+            "associated_peaks": associated_peaks,
+            "associated_ranges": associated_ranges,
+            "associated_integrals": associated_integrals,
+        }
+
+        if id is not None:
+            params["id"] = id
+
+        self.peak_identities.append(Identity(**params))
+
+        return self.peak_identities[-1]
diff --git a/nmrpy/datamodel/core/fidarray.py b/nmrpy/datamodel/core/fidarray.py
index 48b4b49..5fb1400 100644
--- a/nmrpy/datamodel/core/fidarray.py
+++ b/nmrpy/datamodel/core/fidarray.py
@@ -1,6 +1,6 @@
 import sdRDM
 
-from typing import List
+from typing import List, Optional
 from pydantic import Field
 from sdRDM.base.listplus import ListPlus
 from sdRDM.base.utils import forge_signature, IDGenerator
@@ -8,10 +8,9 @@
 
 @forge_signature
 class FIDArray(sdRDM.DataModel):
+    """Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back. Setup time for experiment, Default 0.5}"""
 
-    """Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back.}"""
-
-    id: str = Field(
+    id: Optional[str] = Field(
         description="Unique identifier of the given object.",
         default_factory=IDGenerator("fidarrayINDEX"),
         xml="@id",
diff --git a/nmrpy/datamodel/core/identity.py b/nmrpy/datamodel/core/identity.py
new file mode 100644
index 0000000..a3860a9
--- /dev/null
+++ b/nmrpy/datamodel/core/identity.py
@@ -0,0 +1,51 @@
+import sdRDM
+
+from typing import Optional, Union, List
+from pydantic import Field
+from sdRDM.base.listplus import ListPlus
+from sdRDM.base.utils import forge_signature, IDGenerator
+
+from pydantic.types import FrozenSet
+
+from .abstractspecies import AbstractSpecies
+from .protein import Protein
+from .reactant import Reactant
+
+
+@forge_signature
+class Identity(sdRDM.DataModel):
+    """Container mapping one or more peaks to the respective species."""
+
+    id: Optional[str] = Field(
+        description="Unique identifier of the given object.",
+        default_factory=IDGenerator("identityINDEX"),
+        xml="@id",
+    )
+
+    name: str = Field(
+        ...,
+        description="Descriptive name for the species",
+    )
+
+    enzymeml_species: Union[AbstractSpecies, Protein, Reactant, None] = Field(
+        default=None,
+        description="A species object from an EnzymeML document.",
+    )
+
+    associated_peaks: List[float] = Field(
+        description="Peaks belonging to the given species",
+        default_factory=ListPlus,
+        multiple=True,
+    )
+
+    associated_ranges: List[FrozenSet] = Field(
+        description="Sets of ranges belonging to the given peaks",
+        default_factory=ListPlus,
+        multiple=True,
+    )
+
+    associated_integrals: List[float] = Field(
+        description="Integrals resulting from the given peaks and ranges of a species",
+        default_factory=ListPlus,
+        multiple=True,
+    )
diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py
index 4b68b75..0971fef 100644
--- a/nmrpy/datamodel/core/nmrpy.py
+++ b/nmrpy/datamodel/core/nmrpy.py
@@ -4,7 +4,7 @@
 from pydantic import Field
 from sdRDM.base.utils import forge_signature, IDGenerator
 
-from datetime import datetime
+from datetime import datetime as Datetime
 
 from .citation import Citation
 from .experiment import Experiment
@@ -12,21 +12,20 @@
 
 @forge_signature
 class NMRpy(sdRDM.DataModel):
-
     """Root element of the NMRpy data model."""
 
-    id: str = Field(
+    id: Optional[str] = Field(
         description="Unique identifier of the given object.",
         default_factory=IDGenerator("nmrpyINDEX"),
         xml="@id",
     )
 
-    datetime_created: datetime = Field(
+    datetime_created: Datetime = Field(
         ...,
         description="Date and time this dataset has been created.",
     )
 
-    datetime_modified: Optional[datetime] = Field(
+    datetime_modified: Optional[Datetime] = Field(
         default=None,
         description="Date and time this dataset has last been modified.",
     )
@@ -37,7 +36,7 @@ class NMRpy(sdRDM.DataModel):
     )
 
     citation: Optional[Citation] = Field(
-        default=None,
+        default=Citation(),
         description=(
             "Relevant information regarding the publication and citation of this"
             " dataset."
diff --git a/nmrpy/datamodel/core/parameters.py b/nmrpy/datamodel/core/parameters.py
index 2062e25..66f0c37 100644
--- a/nmrpy/datamodel/core/parameters.py
+++ b/nmrpy/datamodel/core/parameters.py
@@ -8,10 +8,9 @@
 
 @forge_signature
 class Parameters(sdRDM.DataModel):
-
     """Container for relevant NMR parameters."""
 
-    id: str = Field(
+    id: Optional[str] = Field(
         description="Unique identifier of the given object.",
         default_factory=IDGenerator("parametersINDEX"),
         xml="@id",
diff --git a/nmrpy/datamodel/core/person.py b/nmrpy/datamodel/core/person.py
index dcfbaef..c854aeb 100644
--- a/nmrpy/datamodel/core/person.py
+++ b/nmrpy/datamodel/core/person.py
@@ -11,10 +11,9 @@
 
 @forge_signature
 class Person(sdRDM.DataModel):
-
     """Container for information regarding a person that worked on an experiment."""
 
-    id: str = Field(
+    id: Optional[str] = Field(
         description="Unique identifier of the given object.",
         default_factory=IDGenerator("personINDEX"),
         xml="@id",
diff --git a/nmrpy/datamodel/core/processingsteps.py b/nmrpy/datamodel/core/processingsteps.py
new file mode 100644
index 0000000..e4c8830
--- /dev/null
+++ b/nmrpy/datamodel/core/processingsteps.py
@@ -0,0 +1,81 @@
+import sdRDM
+
+from typing import Optional
+from pydantic import Field
+from sdRDM.base.utils import forge_signature, IDGenerator
+
+
+@forge_signature
+class ProcessingSteps(sdRDM.DataModel):
+    """Container for processing steps performed, as well as parameter for them."""
+
+    id: Optional[str] = Field(
+        description="Unique identifier of the given object.",
+        default_factory=IDGenerator("processingstepsINDEX"),
+        xml="@id",
+    )
+
+    is_apodised: Optional[bool] = Field(
+        default=None,
+        description="Whether or not Apodisation (line-broadening) has been performed.",
+    )
+
+    apodisation_frequency: Optional[float] = Field(
+        default=None,
+        description="Degree of Apodisation (line-broadening) in Hz.",
+    )
+
+    is_zero_filled: Optional[bool] = Field(
+        default=False,
+        description="Whether or not Zero-filling has been performed.",
+    )
+
+    is_fourier_transformed: Optional[bool] = Field(
+        default=False,
+        description="Whether or not Fourier transform has been performed.",
+    )
+
+    fourier_transform_type: Optional[str] = Field(
+        default=None,
+        description="The type of Fourier transform used.",
+    )
+
+    is_phased: Optional[bool] = Field(
+        default=False,
+        description="Whether or not Phasing was performed.",
+    )
+
+    zero_order_phase: Optional[float] = Field(
+        default=None,
+        description="Zero-order phase used for Phasing.",
+    )
+
+    first_order_phase: Optional[float] = Field(
+        default=None,
+        description="First-order phase used for Phasing.",
+    )
+
+    is_only_real: Optional[bool] = Field(
+        default=False,
+        description="Whether or not the imaginary part has been discarded.",
+    )
+
+    is_normalised: Optional[bool] = Field(
+        default=False,
+        description="Whether or not Normalisation was performed.",
+    )
+
+    max_value: Optional[float] = Field(
+        default=None,
+        description="Maximum value of the dataset used for Normalisation.",
+    )
+
+    is_deconvoluted: Optional[bool] = Field(
+        default=False,
+        description="Whether or not Deconvolution was performed.",
+    )
+
+    is_baseline_corrected: Optional[bool] = Field(
+        default=False,
+        description="Whether or not Baseline correction was performed.",
+    )
diff --git a/nmrpy/datamodel/core/protein.py b/nmrpy/datamodel/core/protein.py
new file mode 100644
index 0000000..efcc389
--- /dev/null
+++ b/nmrpy/datamodel/core/protein.py
@@ -0,0 +1,57 @@
+import sdRDM
+
+from typing import Optional
+from pydantic import Field
+from sdRDM.base.utils import forge_signature, IDGenerator
+
+
+from .sboterm import SBOTerm
+
+
+@forge_signature
+class Protein(sdRDM.DataModel):
+    """This objects describes the proteins that were used or produced in the course of the experiment."""
+
+    id: Optional[str] = Field(
+        description="Unique identifier of the given object.",
+        default_factory=IDGenerator("proteinINDEX"),
+        xml="@id",
+    )
+
+    sequence: str = Field(
+        ...,
+        description="Amino acid sequence of the protein",
+        template_alias="Sequence",
+    )
+
+    ecnumber: Optional[str] = Field(
+        default=None,
+        description="EC number of the protein.",
+        regex="(\d+.)(\d+.)(\d+.)(\d+)",
+        template_alias="EC Number",
+    )
+
+    organism: Optional[str] = Field(
+        default=None,
+        description="Organism the protein was expressed in.",
+        template_alias="Source organism",
+    )
+
+    organism_tax_id: Optional[str] = Field(
+        default=None,
+        description="Taxonomy identifier of the expression host.",
+    )
+
+    uniprotid: Optional[str] = Field(
+        default=None,
+        description=(
+            "Unique identifier referencing a protein entry at UniProt. Use this"
+            " identifier to initialize the object from the UniProt database."
+        ),
+        template_alias="UniProt ID",
+    )
+
+    ontology: SBOTerm = Field(
+        description="None",
+        default=SBOTerm.CATALYST,
+    )
diff --git a/nmrpy/datamodel/core/publication.py b/nmrpy/datamodel/core/publication.py
index 3d34597..4c41d8c 100644
--- a/nmrpy/datamodel/core/publication.py
+++ b/nmrpy/datamodel/core/publication.py
@@ -7,17 +7,16 @@
 
 from pydantic import AnyUrl
 
-from .person import Person
 from .publicationtypes import PublicationTypes
+from .person import Person
 from .identifiertypes import IdentifierTypes
 
 
 @forge_signature
 class Publication(sdRDM.DataModel):
-
     """Container for citation information of a relevant publication."""
 
-    id: str = Field(
+    id: Optional[str] = Field(
         description="Unique identifier of the given object.",
         default_factory=IDGenerator("publicationINDEX"),
         xml="@id",
@@ -88,3 +87,5 @@ def add_to_authors(
             params["id"] = id
 
         self.authors.append(Person(**params))
+
+        return self.authors[-1]
diff --git a/nmrpy/datamodel/core/reactant.py b/nmrpy/datamodel/core/reactant.py
new file mode 100644
index 0000000..faf65c1
--- /dev/null
+++ b/nmrpy/datamodel/core/reactant.py
@@ -0,0 +1,49 @@
+import sdRDM
+
+from typing import Optional
+from pydantic import Field
+from sdRDM.base.utils import forge_signature, IDGenerator
+
+
+from .sboterm import SBOTerm
+
+
+@forge_signature
+class Reactant(sdRDM.DataModel):
+    """This objects describes the reactants that were used or produced in the course of the experiment."""
+
+    id: Optional[str] = Field(
+        description="Unique identifier of the given object.",
+        default_factory=IDGenerator("reactantINDEX"),
+        xml="@id",
+    )
+
+    smiles: Optional[str] = Field(
+        default=None,
+        description=(
+            "Simplified Molecular Input Line Entry System (SMILES) encoding of the"
+            " reactant."
+        ),
+        template_alias="SMILES",
+    )
+
+    inchi: Optional[str] = Field(
+        default=None,
+        description=(
+            "International Chemical Identifier (InChI) encoding of the reactant."
+        ),
+        template_alias="InCHI",
+    )
+
+    chebi_id: Optional[str] = Field(
+        default=None,
+        description=(
+            "Unique identifier of the CHEBI database. Use this identifier to initialize"
+            " the object from the CHEBI database."
+        ),
+    )
+
+    ontology: SBOTerm = Field(
+        description="None",
+        default=SBOTerm.SMALL_MOLECULE,
+    )
diff --git a/nmrpy/datamodel/core/sboterm.py b/nmrpy/datamodel/core/sboterm.py
new file mode 100644
index 0000000..74d2eb6
--- /dev/null
+++ b/nmrpy/datamodel/core/sboterm.py
@@ -0,0 +1,35 @@
+from enum import Enum
+
+
+class SBOTerm(Enum):
+    BIOCHEMICAL_REACTION = "SBO:0000176"
+    ACID_BASE_REACTION = "SBO:0000208"
+    CONFORMATIONAL_TRANSITION = "SBO:0000181"
+    CONVERSION = "SBO:0000182"
+    DEGRADATION = "SBO:0000179"
+    DISSOCIATION = "SBO:0000180"
+    IONISATION = "SBO:0000209"
+    ISOMERISATION = "SBO:0000377"
+    NON_COVALENT_BINDING = "SBO:0000177"
+    REDOX_REACTION = "SBO:0000200"
+    SPONTANEOUS_REACTION = "SBO:0000672"
+    PROTEIN = "SBO:0000252"
+    GENE = "SBO:0000251"
+    SMALL_MOLECULE = "SBO:0000247"
+    ION = "SBO:0000327"
+    RADICAL = "SBO:0000328"
+    INTERACTOR = "SBO:0000336"
+    SUBSTRATE = "SBO:0000015"
+    PRODUCT = "SBO:0000011"
+    CATALYST = "SBO:0000013"
+    INHIBITOR = "SBO:0000020"
+    ESSENTIAL_ACTIVATOR = "SBO:0000461"
+    NON_ESSENTIAL_ACTIVATOR = "SBO:0000462"
+    POTENTIATOR = "SBO:0000021"
+    MACROMOLECULAR_COMPLEX = "SBO:0000296"
+    PROTEIN_COMPLEX = "SBO:0000297"
+    DIMER = "SBO:0000607"
+    MICHAELIS_MENTEN = "SBO:0000028"
+    K_CAT = "SBO:0000025"
+    K_M = "SBO:0000027"
+    V_MAX = "SBO:0000186"
diff --git a/nmrpy/datamodel/core/term.py b/nmrpy/datamodel/core/term.py
index 6a48293..82c67e8 100644
--- a/nmrpy/datamodel/core/term.py
+++ b/nmrpy/datamodel/core/term.py
@@ -9,10 +9,9 @@
 
 @forge_signature
 class Term(sdRDM.DataModel):
-
     """lorem ipsum {Add reference back to term_cv_reference.}"""
 
-    id: str = Field(
+    id: Optional[str] = Field(
         description="Unique identifier of the given object.",
         default_factory=IDGenerator("termINDEX"),
         xml="@id",
diff --git a/nmrpy/datamodel/core/vessel.py b/nmrpy/datamodel/core/vessel.py
new file mode 100644
index 0000000..5dc6fb5
--- /dev/null
+++ b/nmrpy/datamodel/core/vessel.py
@@ -0,0 +1,52 @@
+import sdRDM
+
+from typing import Optional
+from pydantic import Field
+from sdRDM.base.utils import forge_signature, IDGenerator
+
+from pydantic import StrictBool
+from pydantic import PositiveFloat
+
+
+@forge_signature
+class Vessel(sdRDM.DataModel):
+    """This object describes vessels in which the experiment has been carried out. These can include any type of vessel used in biocatalytic experiments."""
+
+    id: Optional[str] = Field(
+        description="Unique identifier of the given object.",
+        default_factory=IDGenerator("vesselINDEX"),
+        xml="@id",
+    )
+
+    name: str = Field(
+        ...,
+        description="Name of the used vessel.",
+        template_alias="Name",
+    )
+
+    volume: PositiveFloat = Field(
+        ...,
+        description="Volumetric value of the vessel.",
+        template_alias="Volume value",
+    )
+
+    unit: str = Field(
+        ...,
+        description="Volumetric unit of the vessel.",
+        template_alias="Volume unit",
+    )
+
+    constant: StrictBool = Field(
+        description="Whether the volume of the vessel is constant or not.",
+        default=True,
+    )
+
+    uri: Optional[str] = Field(
+        default=None,
+        description="URI of the vessel.",
+    )
+
+    creator_id: Optional[str] = Field(
+        default=None,
+        description="Unique identifier of the author.",
+    )
diff --git a/nmrpy/datamodel/schemes/datamodel_schema.md b/nmrpy/datamodel/schemes/datamodel_schema.md
index 527eb3a..abbf56d 100644
--- a/nmrpy/datamodel/schemes/datamodel_schema.md
+++ b/nmrpy/datamodel/schemes/datamodel_schema.md
@@ -1,10 +1,46 @@
 ```mermaid
 classDiagram
+    AbstractSpecies <-- Protein
+    AbstractSpecies <-- Complex
+    AbstractSpecies <-- Reactant
+    AbstractSpecies <-- Protein
+    AbstractSpecies <-- Reactant
+    EnzymeMLDocument *-- Creator
+    EnzymeMLDocument *-- Vessel
+    EnzymeMLDocument *-- Protein
+    EnzymeMLDocument *-- Complex
+    EnzymeMLDocument *-- Reactant
+    EnzymeMLDocument *-- Reaction
+    EnzymeMLDocument *-- KineticParameter
+    EnzymeMLDocument *-- Measurement
+    EnzymeMLDocument *-- File
+    AbstractSpecies *-- Vessel
+    Protein *-- SBOTerm
+    Complex *-- SBOTerm
+    Reactant *-- SBOTerm
+    Reaction *-- SBOTerm
+    Reaction *-- ReactionElement
+    Reaction *-- KineticModel
+    ReactionElement *-- SBOTerm
+    ReactionElement *-- AbstractSpecies
+    KineticModel *-- SBOTerm
+    KineticModel *-- KineticParameter
+    KineticParameter *-- SBOTerm
+    Measurement *-- MeasurementData
+    MeasurementData *-- AbstractSpecies
+    MeasurementData *-- Replicate
+    Replicate *-- DataTypes
+    Replicate *-- AbstractSpecies
     NMRpy *-- Experiment
     NMRpy *-- Citation
     Experiment *-- FID
     Experiment *-- FIDArray
     FID *-- Parameters
+    FID *-- ProcessingSteps
+    FID *-- Identity
+    Identity *-- AbstractSpecies
+    Identity *-- Protein
+    Identity *-- Reactant
     Citation *-- Subjects
     Citation *-- Person
     Citation *-- Publication
@@ -12,6 +48,9 @@ classDiagram
     Person *-- IdentifierTypes
     Publication *-- PublicationTypes
     Publication *-- Person
+    AbstractSpecies *-- Vessel
+    Protein *-- SBOTerm
+    Reactant *-- SBOTerm
     
     class NMRpy {
         +datetime datetime_created*
@@ -27,8 +66,11 @@ classDiagram
     }
     
     class FID {
-        +float[0..*] data
-        +Parameters parameters
+        +string[0..*] raw_data
+        +string, float[0..*] processed_data
+        +Parameters nmr_parameters
+        +ProcessingSteps processing_steps
+        +Identity[0..*] peak_identities
     }
     
     class Parameters {
@@ -44,6 +86,30 @@ classDiagram
         +float spectral_width_left
     }
     
+    class ProcessingSteps {
+        +boolean is_apodised
+        +float apodisation_frequency
+        +boolean is_zero_filled
+        +boolean is_fourier_transformed
+        +string fourier_transform_type
+        +boolean is_phased
+        +float zero_order_phase
+        +float first_order_phase
+        +boolean is_only_real
+        +boolean is_normalised
+        +float max_value
+        +boolean is_deconvoluted
+        +boolean is_baseline_corrected
+    }
+    
+    class Identity {
+        +string name*
+        +AbstractSpecies, Protein, Reactant enzymeml_species
+        +float[0..*] associated_peaks
+        +frozenset[0..*] associated_ranges
+        +float[0..*] associated_integrals
+    }
+    
     class FIDArray {
         +string[0..*] fids*
     }
@@ -93,6 +159,41 @@ classDiagram
         +any value
     }
     
+    class Vessel {
+        +string name*
+        +posfloat volume*
+        +string unit*
+        +StrictBool constant*
+        +string uri
+        +string creator_id
+    }
+    
+    class AbstractSpecies {
+        +string name*
+        +Vessel vessel_id*
+        +float init_conc
+        +StrictBool constant*
+        +string unit
+        +string uri
+        +string creator_id
+    }
+    
+    class Protein {
+        +string sequence*
+        +string ecnumber
+        +string organism
+        +string organism_tax_id
+        +string uniprotid
+        +SBOTerm ontology*
+    }
+    
+    class Reactant {
+        +string smiles
+        +string inchi
+        +string chebi_id
+        +SBOTerm ontology*
+    }
+    
     class FileFormats {
         << Enumeration >>
         +VARIAN
@@ -118,4 +219,54 @@ classDiagram
         +ORCID
     }
     
+    class SBOTerm {
+        << Enumeration >>
+        +BIOCHEMICAL_REACTION
+        +ACID_BASE_REACTION
+        +CONFORMATIONAL_TRANSITION
+        +CONVERSION
+        +DEGRADATION
+        +DISSOCIATION
+        +IONISATION
+        +ISOMERISATION
+        +NON_COVALENT_BINDING
+        +REDOX_REACTION
+        +SPONTANEOUS_REACTION
+        +PROTEIN
+        +GENE
+        +SMALL_MOLECULE
+        +ION
+        +RADICAL
+        +INTERACTOR
+        +SUBSTRATE
+        +PRODUCT
+        +CATALYST
+        +INHIBITOR
+        +ESSENTIAL_ACTIVATOR
+        +NON_ESSENTIAL_ACTIVATOR
+        +POTENTIATOR
+        +MACROMOLECULAR_COMPLEX
+        +PROTEIN_COMPLEX
+        +DIMER
+        +MICHAELIS_MENTEN
+        +K_CAT
+        +K_M
+        +V_MAX
+    }
+    
+    class DataTypes {
+        << Enumeration >>
+        +CONCENTRATION
+        +ABSORPTION
+        +FEED
+        +BIOMASS
+        +CONVERSION
+        +PEAK_AREA
+    }
+    
+    class https://github.com/EnzymeML/enzymeml-specifications/ {
+        << External Object >>
+        +Repository <sdRDM.markdown.markdownparser.MarkdownParser object at 0x13ede0cd0>
+    }
+    
 ```
\ No newline at end of file
diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py
index fa2fb70..5c63b17 100644
--- a/nmrpy/plotting.py
+++ b/nmrpy/plotting.py
@@ -24,6 +24,15 @@
 import asyncio
 
 
+SPECIES_DECOY = [
+    "TEP",
+    "PEP",
+    "3PG",
+    "2PG",
+    "Phosphate",
+]
+
+
 class Plot:
     """
     Basic 'plot' class containing functions for various types of plots.
@@ -1419,23 +1428,18 @@ def __init__(self, fid, title):
             description="Save selection", icon="file-arrow-down", disabled=True
         )
 
+        # Create a reset button
+        reset_button = Button(description="Reset selection", disabled=True)
+
         # Create an output widget to display the selection
         selection_output = Output()
 
         # Define a method to handle the peak dropdown's change event
         def on_peak_dropdown_change(event):
             if event["type"] == "change" and event["name"] == "value":
-                selected_option = event["new"]
-                if selected_option != "":
-                    species_dropdown.options = [
-                        "3PG",
-                        "2PG",
-                        "Phosphate",
-                        "TEP",
-                        "PEP",
-                    ]
-                    species_dropdown.disabled = False
-                    save_button.disabled = False
+                species_dropdown.options = SPECIES_DECOY
+                species_dropdown.disabled = False
+                save_button.disabled = False
 
         # Attach the function to the dropdown's change event
         peak_dropdown.observe(on_peak_dropdown_change)
@@ -1444,9 +1448,8 @@ def on_peak_dropdown_change(event):
         def on_species_dropdown_change(event):
             if event["type"] == "change" and event["name"] == "value":
                 selected_option = event["new"]
-                if selected_option != "":
-                    new_key = peak_dropdown.value
-                    self.selected_values[new_key] = selected_option
+                new_key = peak_dropdown.value
+                self.selected_values[new_key] = selected_option
 
         # Attach the function to the second dropdown's change event
         species_dropdown.observe(on_species_dropdown_change)
@@ -1461,10 +1464,22 @@ def on_save_button_click(b):
                 self.fid.identities = [
                     value for value in self.selected_values.values()
                 ]
+            reset_button.disabled = False
 
         # Attach the function to the save button's click event
         save_button.on_click(on_save_button_click)
 
+        # Define a function to handle the reset event
+        def on_reset_button_click(b):
+            with selection_output:
+                selection_output.clear_output(wait=True)
+                print("\nCleared selections!")
+                self.fid.identities = []
+                self.selected_values = {}
+
+        # Attach the function to the reset click event
+        reset_button.on_click(on_reset_button_click)
+
         # Create a container for both the title and the dropdown
         container = VBox(
             [
@@ -1472,6 +1487,7 @@ def on_save_button_click(b):
                 peak_dropdown,
                 species_dropdown,
                 save_button,
+                reset_button,
                 selection_output,
             ]
         )
@@ -1530,6 +1546,9 @@ def __init__(self, fid_array):
             description="Save selection", icon="file-arrow-down", disabled=True
         )
 
+        # Create a reset button
+        reset_button = Button(description="Reset selection", disabled=True)
+
         # Create an output widget to display the selection
         selection_output = Output()
 
@@ -1550,15 +1569,7 @@ def on_combobox_change(event):
         # Define a method to handle the peak dropdown's change event
         def on_peak_dropdown_change(event):
             if event["type"] == "change" and event["name"] == "value":
-                selected_option = event["new"]
-                if selected_option != "":
-                    species_dropdown.options = [
-                        "3PG",
-                        "2PG",
-                        "Phosphate",
-                        "TEP",
-                        "PEP",
-                    ]
+                species_dropdown.options = SPECIES_DECOY
                 species_dropdown.disabled = False
                 save_button.disabled = False
 
@@ -1569,9 +1580,8 @@ def on_peak_dropdown_change(event):
         def on_species_dropdown_change(event):
             if event["type"] == "change" and event["name"] == "value":
                 selected_option = event["new"]
-                if selected_option != "":
-                    new_key = peak_dropdown.value
-                    self.selected_values[new_key] = selected_option
+                new_key = peak_dropdown.value
+                self.selected_values[new_key] = selected_option
 
         # Attach the function to the second dropdown's change event
         species_dropdown.observe(on_species_dropdown_change)
@@ -1587,10 +1597,23 @@ def on_save_button_click(b):
                     fid.identities = [
                         value for value in self.selected_values.values()
                     ]
+            reset_button.disabled = False
 
         # Attach the function to the save button's click event
         save_button.on_click(on_save_button_click)
 
+        # Define a function to handle the reset event
+        def on_reset_button_click(b):
+            with selection_output:
+                selection_output.clear_output(wait=True)
+                print("\nCleared selections!")
+                for fid in self.fids:
+                    fid.identities = []
+                self.selected_values = {}
+
+        # Attach the function to the reset click event
+        reset_button.on_click(on_reset_button_click)
+
         # Create a container for both the title and the dropdown
         container = VBox(
             [
@@ -1599,6 +1622,7 @@ def on_save_button_click(b):
                 peak_dropdown,
                 species_dropdown,
                 save_button,
+                reset_button,
                 selection_output,
             ]
         )
diff --git a/specifications/nmrpy.md b/specifications/nmrpy.md
index 97d15e9..9e64e47 100644
--- a/specifications/nmrpy.md
+++ b/specifications/nmrpy.md
@@ -32,6 +32,10 @@ Also preparation of EnzymeML doc
 - __name__
   - Type: string
   - Description: A descriptive name for the overarching experiment.
+- enzymeml_species
+  - Type: https://github.com/EnzymeML/enzymeml-specifications/@AbstractSpecies, https://github.com/EnzymeML/enzymeml-specifications/@Protein, https://github.com/EnzymeML/enzymeml-specifications/@Reactant
+  - Description: A species object from an EnzymeML document.
+  - Multiple: True
 - fid
   - Type: [FID](#fid)
   - Description: A single NMR spectrum.
@@ -45,13 +49,24 @@ Also preparation of EnzymeML doc
 
 Container for a single NMR spectrum.
 
-- data
-  - Type: float
-  - Description: Spectral data from numpy array.
+- raw_data
+  - Type: string
+  - Description: Complex spectral data from numpy array as string of format `{array.real}+{array.imag}j`.
+  - Multiple: True
+- processed_data
+  - Type: string,float
+  - Description: Processed data array.
   - Multiple: True
-- parameters
+- nmr_parameters
   - Type: [Parameters](#parameters)
   - Description: Contains commonly-used NMR parameters.
+- processing_steps
+  - Type: [ProcessingSteps](#processingsteps)
+  - Description: Contains the processing steps performed, as well as the parameters used for them.
+- peak_identities
+  - Type: [Identity](#identity)
+  - Description: Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species.
+  - Multiple: True
 
 
 ### Parameters
@@ -92,6 +107,82 @@ Container for relevant NMR parameters.
   - Description: sw_left
 
 
+### ProcessingSteps
+
+Container for processing steps performed, as well as parameter for them.
+
+- is_apodised
+  - Type: boolean
+  - Description: Whether or not Apodisation (line-broadening) has been performed.
+- apodisation_frequency
+  - Type: float
+  - Description: Degree of Apodisation (line-broadening) in Hz.
+- is_zero_filled
+  - Type: boolean
+  - Description: Whether or not Zero-filling has been performed.
+  - Default: False
+- is_fourier_transformed
+  - Type: boolean
+  - Description: Whether or not Fourier transform has been performed.
+  - Default: False
+- fourier_transform_type
+  - Type: string
+  - Description: The type of Fourier transform used.
+- is_phased
+  - Type: boolean
+  - Description: Whether or not Phasing was performed.
+  - Default: False
+- zero_order_phase
+  - Type: float
+  - Description: Zero-order phase used for Phasing.
+- first_order_phase
+  - Type: float
+  - Description: First-order phase used for Phasing.
+- is_only_real
+  - Type: boolean
+  - Description: Whether or not the imaginary part has been discarded.
+  - Default: False
+- is_normalised
+  - Type: boolean
+  - Description: Whether or not Normalisation was performed.
+  - Default: False
+- max_value
+  - Type: float
+  - Description: Maximum value of the dataset used for Normalisation.
+- is_deconvoluted
+  - Type: boolean
+  - Description: Whether or not Deconvolution was performed.
+  - Default: False
+- is_baseline_corrected
+  - Type: boolean
+  - Description: Whether or not Baseline correction was performed.
+  - Default: False
+
+
+### Identity
+
+Container mapping one or more peaks to the respective species.
+
+- name
+  - Type: string
+  - Description: Descriptive name for the species
+- species_id
+  - Type: string
+  - Description: ID of an EnzymeML species 
+- associated_peaks
+  - Type: float
+  - Description: Peaks belonging to the given species
+  - Multiple: True
+- associated_ranges
+  - Type: frozenset
+  - Description: Sets of ranges belonging to the given peaks
+  - Multiple: True
+- associated_integrals
+  - Type: float
+  - Description: Integrals resulting from the given peaks and ranges of a species
+  - Multiple: True
+
+
 ### FIDArray
 
 Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back. Setup time for experiment, Default 0.5}