diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index b69d94773..0bd4e1fce 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -34,7 +34,7 @@ jobs: curl -LsSf https://astral.sh/uv/install.sh | sh uv pip install coverage coveralls - name: Install nomad - if: "${{ matrix.python_version != '3.8'}}" + if: "${{ matrix.python_version != '3.8' && matrix.python_version != '3.9'}}" run: | uv pip install nomad-lab[infrastructure]@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git - name: Install pynx diff --git a/dev-requirements.txt b/dev-requirements.txt index 1955f22f1..168170419 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -181,7 +181,7 @@ regex==2024.11.6 # via mkdocs-material requests==2.32.3 # via mkdocs-material -ruff==0.9.3 +ruff==0.9.6 # via pynxtools (pyproject.toml) scipy==1.14.1 # via ase diff --git a/src/pynxtools/nomad/entrypoints.py b/src/pynxtools/nomad/entrypoints.py index dfd957a8f..77ad67617 100644 --- a/src/pynxtools/nomad/entrypoints.py +++ b/src/pynxtools/nomad/entrypoints.py @@ -63,7 +63,7 @@ def load(self): name="pynxtools parser", description="A parser for nexus files.", mainfile_name_re=r".*\.nxs", - mainfile_mime_re="application/x-hdf5", + mainfile_mime_re="application/x-hdf*", ) from nomad.config.models.ui import ( @@ -76,7 +76,7 @@ def load(self): SearchQuantities, ) -schema = "pynxtools.nomad.schema.NeXus" +schema = "pynxtools.nomad.schema.Root" nexus_app = AppEntryPoint( name="NexusApp", @@ -105,17 +105,17 @@ def load(self): Column(quantity=f"entry_type", selected=True), Column( title="definition", - quantity=f"data.*.ENTRY[*].definition__field#{schema}", + quantity=f"data.ENTRY[*].definition__field#{schema}", selected=True, ), Column( title="start_time", - quantity=f"data.*.ENTRY[*].start_time__field#{schema}", + quantity=f"data.ENTRY[*].start_time__field#{schema}", selected=True, ), Column( title="title", - quantity=f"data.*.ENTRY[*].title__field#{schema}", + quantity=f"data.ENTRY[*].title__field#{schema}", selected=True, ), ], @@ -161,8 +161,8 @@ def load(self): "autorange": True, "nbins": 30, "scale": "linear", - "quantity": f"data.Root.datetime#{schema}", - "title": "Procesing Time", + "quantity": f"data.ENTRY.start_time__field#{schema}", + "title": "Start Time", "layout": { "lg": {"minH": 3, "minW": 3, "h": 4, "w": 12, "y": 0, "x": 0} }, @@ -177,6 +177,16 @@ def load(self): "lg": {"minH": 3, "minW": 3, "h": 8, "w": 4, "y": 0, "x": 12} }, }, + { + "type": "terms", + "show_input": False, + "scale": "linear", + "quantity": f"data.ENTRY.definition__field#{schema}", + "title": "Definition", + "layout": { + "lg": {"minH": 3, "minW": 3, "h": 8, "w": 4, "y": 0, "x": 16} + }, + }, { "type": "periodic_table", "scale": "linear", diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 2fea9afde..1718251cd 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -27,7 +27,7 @@ from nomad.datamodel import EntryArchive, EntryMetadata from nomad.datamodel.data import EntryData from nomad.datamodel.results import Material, Results - from nomad.metainfo import MSection + from nomad.metainfo import MEnum, MSection from nomad.metainfo.util import MQuantity, MSubSectionList, resolve_variadic_name from nomad.parsing import MatchingParser from nomad.units import ureg @@ -40,8 +40,10 @@ import pynxtools.nomad.schema as nexus_schema from pynxtools.nexus.nexus import HandleNexus +from pynxtools.nomad.utils import __FIELD_STATISTICS as FIELD_STATISTICS from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad +from pynxtools.nomad.utils import get_quantity_base_name def _to_group_name(nx_node: ET.Element): @@ -60,6 +62,7 @@ def _to_section( nx_def: str, nx_node: Optional[ET.Element], current: MSection, + nx_root, ) -> MSection: """ Args: @@ -95,17 +98,25 @@ def _to_section( nomad_def_name = rename_nx_for_nomad(nomad_def_name, is_group=True) - # for groups, get the definition from the package - new_def = current.m_def.all_sub_sections[nomad_def_name] - - new_section: MSection = None # type:ignore - - for section in current.m_get_sub_sections(new_def): - if hdf_name is None or getattr(section, "nx_name", None) == hdf_name: - new_section = section - break - - if new_section is None: + if current == nx_root: + # for groups, get the definition from the package + new_def = current.m_def.all_sub_sections["ENTRY"] + for section in current.m_get_sub_sections(new_def): + if hdf_name is None or getattr(section, "nx_name", None) == hdf_name: + return section + cls = getattr(nexus_schema, nx_def, None) + sec = cls() + new_def_spec = sec.m_def.all_sub_sections[nomad_def_name] + sec.m_create(new_def_spec.section_def.section_cls) + new_section = sec.m_get_sub_section(new_def_spec, -1) + current.ENTRY.append(new_section) + new_section.__dict__["nx_name"] = hdf_name + else: + # for groups, get the definition from the package + new_def = current.m_def.all_sub_sections[nomad_def_name] + for section in current.m_get_sub_sections(new_def): + if hdf_name is None or getattr(section, "nx_name", None) == hdf_name: + return section current.m_create(new_def.section_def.section_cls) new_section = current.m_get_sub_section(new_def, -1) new_section.__dict__["nx_name"] = hdf_name @@ -120,11 +131,13 @@ def _get_value(hdf_node): hdf_value = hdf_node[...] if str(hdf_value.dtype) == "bool": + if len(hdf_value.shape) > 0: + return bool(hdf_value.tolist()[0]) return bool(hdf_value) if hdf_value.dtype.kind in "iufc": return hdf_value if len(hdf_value.shape) > 0: - return hdf_value.astype(str) + return str([i for i in hdf_value.astype(str)]) return hdf_node[()].decode() @@ -186,49 +199,52 @@ def _populate_data( attr_name = nx_attr.get("name") # could be 1D array, float or int attr_value = hdf_node.attrs[attr_name] - if not isinstance(attr_value, str): - if isinstance(attr_value, np.ndarray): - attr_value = attr_value.tolist() - if len(attr_value) == 1: - attr_value = attr_value[0] - # so values of non-scalar attribute will not end up in metainfo! - - attr_name = attr_name + "__attribute" - current = _to_section(attr_name, nx_def, nx_attr, current) - + current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root) try: if nx_root or nx_parent.tag.endswith("group"): - current.m_set_section_attribute(attr_name, attr_value) + parent_html_name = "" + parent_name = "" + parent_field_name = "" else: parent_html_name = nx_path[-2].get("name") - - parent_instance_name = hdf_node.name.split("/")[-1] + "__field" + parent_name = hdf_node.name.split("/")[-1] parent_field_name = parent_html_name + "__field" - - metainfo_def = None - try: - metainfo_def = resolve_variadic_name( - current.m_def.all_properties, parent_field_name - ) - except ValueError as exc: - self._logger.warning( - f"{current.m_def} has no suitable property for {parent_field_name}", - target_name=attr_name, - exc_info=exc, - ) - if parent_field_name in current.__dict__: - quantity = current.__dict__[parent_field_name] - if isinstance(quantity, dict): - quantity = quantity[parent_instance_name] - else: - quantity = None - raise Warning( - "setting attribute attempt before creating quantity" - ) - quantity.m_set_attribute(attr_name, attr_value) + attribute_name = parent_html_name + "___" + attr_name + data_instance_name = parent_name + "___" + attr_name + metainfo_def = None + try: + metainfo_def = resolve_variadic_name( + current.m_def.all_properties, attribute_name + ) + attribute = attr_value + # TODO: get unit from attribute _units + if isinstance(metainfo_def.type, MEnum): + attribute = str(attr_value) + elif not isinstance(attr_value, str): + if isinstance(attr_value, np.ndarray): + attr_list = attr_value.tolist() + if ( + len(attr_list) == 1 + or attr_value.dtype.kind in "iufc" + ): + attribute = attr_list[0] + else: + attribute = str(attr_list) + if metainfo_def.use_full_storage: + attribute = MQuantity.wrap(attribute, data_instance_name) + except ValueError as exc: + self._logger.warning( + f"{current.m_def} has no suitable property for {parent_field_name} and {attr_name} as {attribute_name}", + target_name=attr_name, + exc_info=exc, + ) + current.m_set(metainfo_def, attribute) + # if attributes are set before setting the quantity, a bug can cause them being set under a wrong variadic name + attribute.m_set_attribute("m_nx_data_path", hdf_node.name) + attribute.m_set_attribute("m_nx_data_file", self.nxs_fname) except Exception as e: self._logger.warning( - "error while setting attribute", + f"error while setting attribute {data_instance_name} in {current.m_def} as {metainfo_def}", target_name=attr_name, exc_info=e, ) @@ -242,6 +258,7 @@ def _populate_data( metainfo_def = resolve_variadic_name( current.m_def.all_properties, field_name ) + isvariadic = any(char.isupper() for char in metainfo_def.more["nx_name"]) # for data arrays only statistics if not all values NINF, Inf, or NaN field_stats = None @@ -249,14 +266,13 @@ def _populate_data( if isinstance(field, np.ndarray) and field.size > 1: mask = np.isfinite(field) if np.any(mask): - field_stats = np.array( - [ - np.mean(field[mask]), - np.var(field[mask]), - np.min(field[mask]), - np.max(field[mask]), - ] - ) + field_stats = [ + func(field[mask] if ismask else field) + for func, ismask in zip( + FIELD_STATISTICS["function"], + FIELD_STATISTICS["mask"], + ) + ] field = field_stats[0] if not np.isfinite(field): self._logger.warning( @@ -285,6 +301,12 @@ def _populate_data( else: pint_unit = ureg.parse_units("1") field = ureg.Quantity(field, pint_unit) + if field_stats is not None: + for i in range(len(field_stats)): + if FIELD_STATISTICS["mask"][i]: + field_stats[i] = ureg.Quantity( + field_stats[i], pint_unit + ) except (ValueError, UndefinedUnitError): pass @@ -299,14 +321,30 @@ def _populate_data( current.m_set(metainfo_def, field) field.m_set_attribute("m_nx_data_path", hdf_node.name) field.m_set_attribute("m_nx_data_file", self.nxs_fname) + if isvariadic: + concept_basename = get_quantity_base_name(field.name) + instancename = get_quantity_base_name(data_instance_name) + name_metainfo_def = resolve_variadic_name( + current.m_def.all_properties, concept_basename + "__name" + ) + name_value = MQuantity.wrap(instancename, instancename + "__name") + current.m_set(name_metainfo_def, name_value) + name_value.m_set_attribute("m_nx_data_path", hdf_node.name) + name_value.m_set_attribute("m_nx_data_file", self.nxs_fname) if field_stats is not None: - # TODO _add_additional_attributes function has created these nx_data_* - # attributes speculatively already so if the field_stats is None - # this will cause unpopulated attributes in the GUI - field.m_set_attribute("nx_data_mean", field_stats[0]) - field.m_set_attribute("nx_data_var", field_stats[1]) - field.m_set_attribute("nx_data_min", field_stats[2]) - field.m_set_attribute("nx_data_max", field_stats[3]) + concept_basename = get_quantity_base_name(field.name) + instancename = get_quantity_base_name(data_instance_name) + for suffix, stat in zip( + FIELD_STATISTICS["suffix"][1:], + field_stats[1:], + ): + stat_metainfo_def = resolve_variadic_name( + current.m_def.all_properties, concept_basename + suffix + ) + stat = MQuantity.wrap(stat, instancename + suffix) + current.m_set(stat_metainfo_def, stat) + stat.m_set_attribute("m_nx_data_path", hdf_node.name) + stat.m_set_attribute("m_nx_data_file", self.nxs_fname) except Exception as e: self._logger.warning( "error while setting field", @@ -332,12 +370,13 @@ def __nexus_populate(self, params: dict, attr=None): # pylint: disable=W0613 if nx_path is None or nx_path == "/": return - current: MSection = _to_section(None, nx_def, None, self.nx_root) + # current: MSection = _to_section(None, nx_def, None, self.nx_root) + current = self.nx_root depth: int = 1 current_hdf_path = "" for name in hdf_path.split("/")[1:]: nx_node = nx_path[depth] if depth < len(nx_path) else name - current = _to_section(name, nx_def, nx_node, current) + current = _to_section(name, nx_def, nx_node, current, self.nx_root) self._collect_class(current) depth += 1 if depth < len(nx_path): @@ -468,13 +507,15 @@ def parse( child_archives: Dict[str, EntryArchive] = None, ) -> None: self.archive = archive - self.nx_root = nexus_schema.NeXus() # type: ignore # pylint: disable=no-member + self.nx_root = nexus_schema.Root() # type: ignore # pylint: disable=no-member self.archive.data = self.nx_root self._logger = logger if logger else get_logger(__name__) self._clear_class_refs() - *_, self.nxs_fname = mainfile.rsplit("/", 1) + # if filename does not follow the pattern + # .volumes/fs/////[subdirs?]/ + self.nxs_fname = "/".join(mainfile.split("/")[6:]) or mainfile nexus_helper = HandleNexus(logger, mainfile) nexus_helper.process_nexus_master_file(self.__nexus_populate) @@ -483,25 +524,18 @@ def parse( archive.metadata = EntryMetadata() # Normalise experiment type - app_defs = str(self.nx_root).split("(")[1].split(")")[0].split(",") - app_def_list = [] - for app_elem in app_defs: - app = app_elem.lstrip() - try: - app_sec = getattr(self.nx_root, app) + # app_defs = str(self.nx_root).split("(")[1].split(")")[0].split(",") + app_def_list = set() + try: + app_entries = getattr(self.nx_root, "ENTRY") + for entry in app_entries: try: - app_entry = getattr(app_sec, "ENTRY") - if len(app_entry) < 1: - raise AttributeError() + app = entry.definition__field + app_def_list.add(rename_nx_for_nomad(app) if app else "Generic") except (AttributeError, TypeError): - app_entry = getattr(app_sec, "entry") - if len(app_entry) < 1: - raise AttributeError() - app_def_list.append( - app if app != rename_nx_for_nomad("NXroot") else "Generic" - ) - except (AttributeError, TypeError): - pass + pass + except (AttributeError, TypeError): + pass if len(app_def_list) == 0: app_def = "Experiment" else: diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index dc19f8f14..cb39d9aa6 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -33,8 +33,9 @@ try: from nomad import utils from nomad.datamodel import EntryArchive, EntryMetadata - from nomad.datamodel.data import EntryData, Schema + from nomad.datamodel.data import ArchiveSection, EntryData, Schema from nomad.datamodel.metainfo import basesections + from nomad.datamodel.metainfo.annotations import ELNAnnotation from nomad.datamodel.metainfo.basesections import ( ActivityResult, ActivityStep, @@ -81,7 +82,12 @@ from pynxtools import get_definitions_url from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path -from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, __rename_nx_for_nomad +from pynxtools.nomad.utils import ( + __FIELD_STATISTICS, + __REPLACEMENT_FOR_NX, + __rename_nx_for_nomad, + get_quantity_base_name, +) # __URL_REGEXP from # https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url @@ -101,19 +107,49 @@ __logger = get_logger(__name__) + +class NexusBaseSection(BaseSection): + def normalize(self, archive, logger): + if self.__dict__["nx_name"]: + self.name = self.__dict__["nx_name"] + super().normalize(archive, logger) + + +class NexusActivityStep(ActivityStep): + reference = Quantity( + type=ArchiveSection, + description="A reference to a NeXus Activity Step.", + a_eln=ELNAnnotation( + component="ReferenceEditQuantity", + label="section reference", + ), + ) + + +class NexusActivityResult(ActivityResult): + reference = Quantity( + type=ArchiveSection, + description="A reference to a NeXus Activity Result.", + a_eln=ELNAnnotation( + component="ReferenceEditQuantity", + label="section reference", + ), + ) + + __BASESECTIONS_MAP: Dict[str, Any] = { "NXfabrication": [basesections.Instrument], "NXsample": [CompositeSystem], "NXsample_component": [Component], "NXidentifier": [EntityReference], - "NXentry": [ActivityStep], - "NXprocess": [ActivityStep], - "NXdata": [ActivityResult], + "NXentry": [NexusActivityStep], + "NXprocess": [NexusActivityStep], + "NXdata": [NexusActivityResult], # "object": BaseSection, } -class NexusMeasurement(Measurement): +class NexusMeasurement(Measurement, Schema): def normalize(self, archive, logger): try: app_entry = getattr(self, "ENTRY") @@ -121,23 +157,19 @@ def normalize(self, archive, logger): raise AttributeError() self.steps = [] for entry in app_entry: - sec_c = entry.m_copy() - self.steps.append(sec_c) + ref = NexusActivityStep(name=entry.name, reference=entry) + self.steps.append(ref) + mapping = { + ActivityStep: (NexusActivityStep, self.steps), + basesections.Instrument: (InstrumentReference, self.instruments), + CompositeSystem: (CompositeSystemReference, self.samples), + ActivityResult: (NexusActivityResult, self.results), + } for sec in entry.m_all_contents(): - if isinstance(sec, ActivityStep): - sec_c = sec.m_copy() - self.steps.append(sec_c) - elif isinstance(sec, basesections.Instrument): - ref = InstrumentReference(name=sec.name) - ref.reference = sec - self.instruments.append(ref) - elif isinstance(sec, CompositeSystem): - ref = CompositeSystemReference(name=sec.name) - ref.reference = sec - self.samples.append(ref) - elif isinstance(sec, ActivityResult): - sec_c = sec.m_copy() - self.results.append(sec_c) + for cls, (ref_cls, collection) in mapping.items(): + if isinstance(sec, cls): + collection.append(ref_cls(name=sec.name, reference=sec)) + break if self.m_def.name == "Root": self.method = "Generic Experiment" else: @@ -158,7 +190,7 @@ def normalize(self, archive, logger): act_array = archive.workflow2.tasks existing_items = {(task.name, task.section) for task in act_array} new_items = [ - item.to_task() + item.reference.to_task() for item in self.steps if (item.name, item) not in existing_items ] @@ -177,9 +209,9 @@ def normalize(self, archive, logger): act_array = archive.workflow2.outputs existing_items = {(link.name, link.section) for link in act_array} new_items = [ - Link(name=item.name, section=item) + Link(name=item.name, section=item.reference) for item in self.results - if (item.name, item) not in existing_items + if (item.name, item.reference) not in existing_items ] act_array.extend(new_items) @@ -364,7 +396,8 @@ def __get_documentation_url( ) nx_package = xml_parent.get("nxdl_base").split("/")[-1] anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) - return f"{doc_base}/{nx_package}/{anchor_segments[-1]}.html#{anchor}" + nx_file = anchor_segments[-1].replace("-", "_") + return f"{doc_base}/{nx_package}/{nx_file}.html#{anchor}" def __to_section(name: str, **kwargs) -> Section: @@ -445,16 +478,19 @@ def __add_common_properties(xml_node: ET.Element, definition: Definition): definition.more["nx_optional"] = __if_base(xml_node) -def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantity]): +def __create_attributes( + xml_node: ET.Element, definition: Union[Section, Quantity], field: Quantity = None +): """ Add all attributes in the given nexus XML node to the given - Quantity or SubSection using the Attribute class (new mechanism). + Quantity or SubSection using a specially named Quantity class. todo: account for more attributes of attribute, e.g., default, minOccurs """ for attribute in xml_node.findall("nx:attribute", __XML_NAMESPACES): name = __rename_nx_for_nomad(attribute.get("name"), is_attribute=True) + shape: list = [] nx_enum = __get_enumeration(attribute) if nx_enum: nx_type = nx_enum @@ -473,19 +509,73 @@ def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantit else: nx_shape = [] - m_attribute = Attribute( - name=name, variable=__if_template(name), shape=nx_shape, type=nx_type + a_name = (field.more["nx_name"] if field else "") + "___" + name + m_attribute = Quantity( + name=a_name, + variable=__if_template(name) + or (__if_template(field.more["nx_name"]) if field else False), + shape=shape, + type=nx_type, + flexible_unit=True, + ) + m_attribute.more.update( + dict(nx_kind="attribute") # , nx_type=nx_type, nx_shape=nx_shape) ) for name, value in attribute.items(): m_attribute.more[f"nx_{name}"] = value __add_common_properties(attribute, m_attribute) + # TODO: decide if stats/instancename should be made searchable for attributes, too + # __add_quantity_stats(definition,m_attribute) - definition.attributes.append(m_attribute) + definition.quantities.append(m_attribute) -def __add_additional_attributes(definition: Definition): +def __add_quantity_stats(container: Section, quantity: Quantity): + # TODO We should also check the shape of the quantity and the datatype as + # the statistics are always mapping on float64 even if quantity values are ints + if not quantity.name.endswith("__field"): + return + isvariadic = any(char.isupper() for char in quantity.more["nx_name"]) + notnumber = quantity.type not in [ + np.float64, + np.int64, + np.uint64, + ] and not isinstance(quantity.type, Number) + if notnumber and not isvariadic: + return + basename = get_quantity_base_name(quantity.name) + if isvariadic: + container.quantities.append( + Quantity( + name=basename + "__name", + variable=quantity.variable, + shape=[], + type=str, + description="This is a NeXus template property. " + "This quantity holds the instance name of a NeXus Field.", + ) + ) + if notnumber: + return + for suffix, dtype in zip( + __FIELD_STATISTICS["suffix"][1:], + __FIELD_STATISTICS["type"][1:], + ): + container.quantities.append( + Quantity( + name=basename + suffix, + variable=quantity.variable, + shape=[], + type=dtype if dtype else quantity.type, + description="This is a NeXus template property. " + "This quantity holds specific statistics of the NeXus data array.", + ) + ) + + +def __add_additional_attributes(definition: Definition, container: Section): if "m_nx_data_path" not in definition.attributes: definition.attributes.append( Attribute( @@ -511,31 +601,7 @@ def __add_additional_attributes(definition: Definition): ) if isinstance(definition, Quantity): - # TODO We should also check the shape of the quantity and the datatype as - # the statistics are always mapping on float64 even if quantity values are ints - if definition.type not in [np.float64, np.int64, np.uint64] and not isinstance( - definition.type, Number - ): - return - - for nx_array_attr in [ - "nx_data_mean", - "nx_data_var", - "nx_data_min", - "nx_data_max", - ]: - if nx_array_attr in definition.all_attributes: - continue - definition.attributes.append( - Attribute( - name=nx_array_attr, - variable=False, - shape=[], - type=np.float64, - description="This is a NeXus template property. " - "This attribute holds specific statistics of the NeXus data array.", - ) - ) + __add_quantity_stats(container, definition) def __create_field(xml_node: ET.Element, container: Section) -> Quantity: @@ -614,7 +680,7 @@ def __create_field(xml_node: ET.Element, container: Section) -> Quantity: container.quantities.append(value_quantity) - __create_attributes(xml_node, value_quantity) + __create_attributes(xml_node, container, value_quantity) return value_quantity @@ -633,7 +699,9 @@ def __create_group(xml_node: ET.Element, root_section: Section): nx_type = __rename_nx_for_nomad(xml_attrs["type"]) nx_name = xml_attrs.get("name", nx_type.upper()) - section_name = __rename_nx_for_nomad(nx_name, is_group=True) + section_name = ( + root_section.name + "__" + __rename_nx_for_nomad(nx_name, is_group=True) + ) group_section = Section(validate=VALIDATE, nx_kind="group", name=section_name) __attach_base_section(group_section, root_section, __to_section(nx_type)) @@ -651,8 +719,7 @@ def __create_group(xml_node: ET.Element, root_section: Section): variable=__if_template(nx_name), ) - root_section.inner_section_definitions.append(group_section) - + __section_definitions[section_name] = group_section root_section.sub_sections.append(group_subsection) __create_group(group, group_section) @@ -707,8 +774,13 @@ def __attach_base_section(section: Section, container: Section, default: Section a base-section with a suitable base. """ try: + newdefinitions = {} + for def_name, act_def in container.all_sub_sections.items(): + newdefinitions[def_name] = act_def.sub_section base_section = nexus_resolve_variadic_name( - container.all_inner_section_definitions, section.name, filter=default + newdefinitions, + section.name.split("__")[-1], + filter=default, ) except ValueError: base_section = None @@ -739,7 +811,7 @@ def __create_class_section(xml_node: ET.Element) -> Section: [NexusMeasurement] if xml_attrs["extends"] == "NXobject" else [] ) else: - nomad_base_sec_cls = __BASESECTIONS_MAP.get(nx_name, [BaseSection]) + nomad_base_sec_cls = __BASESECTIONS_MAP.get(nx_name, [NexusBaseSection]) nx_name = __rename_nx_for_nomad(nx_name) class_section: Section = __to_section( @@ -855,7 +927,7 @@ def __add_section_from_nxdl(xml_node: ET.Element) -> Optional[Section]: return None -def __create_package_from_nxdl_directories(nexus_section: Section) -> Package: +def __create_package_from_nxdl_directories() -> Package: """ Creates a metainfo package from the given nexus directory. Will generate the respective metainfo definitions from all the nxdl files in that directory. @@ -875,16 +947,27 @@ def __create_package_from_nxdl_directories(nexus_section: Section) -> Package: sections.append(section) sections.sort(key=lambda x: x.name) + nexus_sections = {} + for section_name in ["_Applications", "_BaseSections"]: # , '_InnerSections']: + nexus_sections[section_name] = Section(validate=VALIDATE, name=section_name) + package.section_definitions.append(nexus_sections[section_name]) for section in sections: package.section_definitions.append(section) - if section.nx_category == "application" or ( - section.nx_category == "base" and section.nx_name == "NXroot" - ): - nexus_section.sub_sections.append( + if section.nx_category == "application" or section.nx_name == "NXroot": + key = "_Applications" + elif section.nx_category == "base": + key = "_BaseSections" + else: + key = None + + if key: + nexus_sections[key].sub_sections.append( SubSection(section_def=section, name=section.name) ) + for section_name, section in __section_definitions.items(): + if "__" in section_name: + package.section_definitions.append(section) - package.section_definitions.append(nexus_section) return package @@ -916,14 +999,6 @@ def init_nexus_metainfo(): if nexus_metainfo_package is not None: return - # We take the application definitions and create a common parent section that allows - # to include nexus in an EntryArchive. - # To be able to register it into data section, it is expected that this section inherits from Schema. - nexus_section = Section( - validate=VALIDATE, name=__GROUPING_NAME, label=__GROUPING_NAME - ) - nexus_section.base_sections = [Schema.m_def] - # try: # load_nexus_schema('') # except Exception: @@ -932,8 +1007,11 @@ def init_nexus_metainfo(): # save_nexus_schema('') # except Exception: # pass - nexus_metainfo_package = __create_package_from_nxdl_directories(nexus_section) + nexus_metainfo_package = __create_package_from_nxdl_directories() nexus_metainfo_package.section_definitions.append(NexusMeasurement.m_def) + nexus_metainfo_package.section_definitions.append(NexusActivityStep.m_def) + nexus_metainfo_package.section_definitions.append(NexusActivityResult.m_def) + nexus_metainfo_package.section_definitions.append(NexusBaseSection.m_def) # We need to initialize the metainfo definitions. This is usually done automatically, # when the metainfo schema is defined though MSection Python classes. @@ -953,9 +1031,9 @@ def init_nexus_metainfo(): for section in sections: if not (str(section).startswith("pynxtools.")): continue - __add_additional_attributes(section) + __add_additional_attributes(section, None) for quantity in section.quantities: - __add_additional_attributes(quantity) + __add_additional_attributes(quantity, section) # We skip the Python code generation for now and offer Python classes as variables # TO DO not necessary right now, could also be done case-by-case by the nexus parser @@ -972,6 +1050,13 @@ def normalize_fabrication(self, archive, logger): current_cls = __section_definitions[ __rename_nx_for_nomad("NXfabrication") ].section_cls + self.name = ( + self.__dict__["nx_name"] + + " (" + + ((self.vendor__field + " / ") if self.vendor__field else "") + + (self.model__field if self.model__field else "") + + ")" + ) super(current_cls, self).normalize(archive, logger) diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py index 794a94e60..2174d7c47 100644 --- a/src/pynxtools/nomad/utils.py +++ b/src/pynxtools/nomad/utils.py @@ -16,7 +16,9 @@ # limitations under the License. # -from typing import Optional +from typing import Dict, Optional + +import numpy as np __REPLACEMENT_FOR_NX = "" @@ -79,6 +81,21 @@ def __rename_nx_for_nomad( elif is_field: name += "__field" elif is_attribute: - name += "__attribute" - + pass return name + + +def get_quantity_base_name(quantity_name): + return ( + quantity_name[:-7] + if quantity_name.endswith("__field") and quantity_name[-8] != "_" + else quantity_name + ) + + +__FIELD_STATISTICS: Dict[str, list] = { + "suffix": ["__mean", "__std", "__min", "__max", "__size", "__ndim"], + "function": [np.mean, np.std, np.min, np.max, np.size, np.ndim], + "type": [np.float64, np.float64, None, None, np.int32, np.int32], + "mask": [True, True, True, True, False, False], +} diff --git a/tests/nomad/test_parsing.py b/tests/nomad/test_parsing.py index 8a71f9af3..e050448a7 100644 --- a/tests/nomad/test_parsing.py +++ b/tests/nomad/test_parsing.py @@ -41,7 +41,7 @@ def test_nexus_example(): example_data = "src/pynxtools/data/201805_WSe2_arpes.nxs" NexusParser().parse(example_data, archive, get_logger(__name__)) - arpes_obj = getattr(archive.data, rename_nx_for_nomad("NXarpes")) + arpes_obj = archive.data assert arpes_obj.ENTRY[0].SAMPLE[0].pressure__field == ureg.Quantity( "3.27e-10*millibar" @@ -72,12 +72,10 @@ def test_nexus_example(): assert data.energies__field.check("eV") # manual name resolution assert data.AXISNAME__field["angles__field"] is not None - assert ( - data.AXISNAME__field["angles__field"].attributes["nx_data_max"] - == 2.168025463513032 - ) + assert data.AXISNAME__max["angles__max"].value == 2.168025463513032 assert (1 * data.AXISNAME__field["angles__field"].unit).check("1/Å") assert (1 * data.AXISNAME__field["delays__field"].unit).check("fs") + assert data.___axes == "['angles', 'energies', 'delays']" def test_same_name_field_and_group(): @@ -94,9 +92,9 @@ def test_nexus_example_with_renamed_groups(): os.path.dirname(__file__), "../data/nomad/NXlauetof.hdf5" ) NexusParser().parse(lauetof_data, archive, get_logger(__name__)) - lauetof_obj = getattr(archive.data, rename_nx_for_nomad("NXlauetof")) + lauetof_obj = archive.data - assert lauetof_obj.entry.name__group.time_of_flight__field == ureg.Quantity( + assert lauetof_obj.ENTRY[0].name__group.time_of_flight__field == ureg.Quantity( "1.0*second" ) - assert lauetof_obj.entry.sample.name__field == "SAMPLE-CHAR-DATA" + assert lauetof_obj.ENTRY[0].sample.name__field == "SAMPLE-CHAR-DATA"