From 36a121e47bafbba5bfa5d7a4cb71fd2cb4c51ce9 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 10 Jan 2025 22:38:08 +0100 Subject: [PATCH 01/31] use pynxtools.nomad.schema.Root --- src/pynxtools/nomad/entrypoints.py | 22 ++++++++++---- src/pynxtools/nomad/parser.py | 49 ++++++++++++++++-------------- src/pynxtools/nomad/schema.py | 2 +- 3 files changed, 44 insertions(+), 29 deletions(-) diff --git a/src/pynxtools/nomad/entrypoints.py b/src/pynxtools/nomad/entrypoints.py index dfd957a8f..66b3cb843 100644 --- a/src/pynxtools/nomad/entrypoints.py +++ b/src/pynxtools/nomad/entrypoints.py @@ -76,7 +76,7 @@ def load(self): SearchQuantities, ) -schema = "pynxtools.nomad.schema.NeXus" +schema = "pynxtools.nomad.schema.Root" nexus_app = AppEntryPoint( name="NexusApp", @@ -105,17 +105,17 @@ def load(self): Column(quantity=f"entry_type", selected=True), Column( title="definition", - quantity=f"data.*.ENTRY[*].definition__field#{schema}", + quantity=f"data.ENTRY[*].definition__field#{schema}", selected=True, ), Column( title="start_time", - quantity=f"data.*.ENTRY[*].start_time__field#{schema}", + quantity=f"data.ENTRY[*].start_time__field#{schema}", selected=True, ), Column( title="title", - quantity=f"data.*.ENTRY[*].title__field#{schema}", + quantity=f"data.ENTRY[*].title__field#{schema}", selected=True, ), ], @@ -161,8 +161,8 @@ def load(self): "autorange": True, "nbins": 30, "scale": "linear", - "quantity": f"data.Root.datetime#{schema}", - "title": "Procesing Time", + "quantity": f"data.ENTRY.start_time__field#{schema}", + "title": "Start Time", "layout": { "lg": {"minH": 3, "minW": 3, "h": 4, "w": 12, "y": 0, "x": 0} }, @@ -177,6 +177,16 @@ def load(self): "lg": {"minH": 3, "minW": 3, "h": 8, "w": 4, "y": 0, "x": 12} }, }, + { + "type": "terms", + "show_input": False, + "scale": "linear", + "quantity": f"data.ENTRY.definition__field#{schema}", + "title": "Definition", + "layout": { + "lg": {"minH": 3, "minW": 3, "h": 8, "w": 4, "y": 0, "x": 16} + }, + }, { "type": "periodic_table", "scale": "linear", diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 2fea9afde..00ec67b61 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -60,6 +60,7 @@ def _to_section( nx_def: str, nx_node: Optional[ET.Element], current: MSection, + nx_root, ) -> MSection: """ Args: @@ -105,7 +106,17 @@ def _to_section( new_section = section break - if new_section is None: + if new_section is not None: + return new_section + if current == nx_root: + cls = getattr(nexus_schema, nx_def, None) + sec = cls() + new_def_spec = sec.m_def.all_sub_sections[nomad_def_name] + sec.m_create(new_def_spec.section_def.section_cls) + new_section = sec.m_get_sub_section(new_def_spec, -1) + current.ENTRY.append(new_section) + new_section.__dict__["nx_name"] = hdf_name + else: current.m_create(new_def.section_def.section_cls) new_section = current.m_get_sub_section(new_def, -1) new_section.__dict__["nx_name"] = hdf_name @@ -194,7 +205,7 @@ def _populate_data( # so values of non-scalar attribute will not end up in metainfo! attr_name = attr_name + "__attribute" - current = _to_section(attr_name, nx_def, nx_attr, current) + current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root) try: if nx_root or nx_parent.tag.endswith("group"): @@ -332,12 +343,13 @@ def __nexus_populate(self, params: dict, attr=None): # pylint: disable=W0613 if nx_path is None or nx_path == "/": return - current: MSection = _to_section(None, nx_def, None, self.nx_root) + # current: MSection = _to_section(None, nx_def, None, self.nx_root) + current = self.nx_root depth: int = 1 current_hdf_path = "" for name in hdf_path.split("/")[1:]: nx_node = nx_path[depth] if depth < len(nx_path) else name - current = _to_section(name, nx_def, nx_node, current) + current = _to_section(name, nx_def, nx_node, current, self.nx_root) self._collect_class(current) depth += 1 if depth < len(nx_path): @@ -468,7 +480,7 @@ def parse( child_archives: Dict[str, EntryArchive] = None, ) -> None: self.archive = archive - self.nx_root = nexus_schema.NeXus() # type: ignore # pylint: disable=no-member + self.nx_root = nexus_schema.Root() # type: ignore # pylint: disable=no-member self.archive.data = self.nx_root self._logger = logger if logger else get_logger(__name__) @@ -483,25 +495,18 @@ def parse( archive.metadata = EntryMetadata() # Normalise experiment type - app_defs = str(self.nx_root).split("(")[1].split(")")[0].split(",") - app_def_list = [] - for app_elem in app_defs: - app = app_elem.lstrip() - try: - app_sec = getattr(self.nx_root, app) + # app_defs = str(self.nx_root).split("(")[1].split(")")[0].split(",") + app_def_list = set() + try: + app_entries = getattr(self.nx_root, "ENTRY") + for entry in app_entries: try: - app_entry = getattr(app_sec, "ENTRY") - if len(app_entry) < 1: - raise AttributeError() + app = entry.definition__field + app_def_list.add(rename_nx_for_nomad(app) if app else "Generic") except (AttributeError, TypeError): - app_entry = getattr(app_sec, "entry") - if len(app_entry) < 1: - raise AttributeError() - app_def_list.append( - app if app != rename_nx_for_nomad("NXroot") else "Generic" - ) - except (AttributeError, TypeError): - pass + pass + except (AttributeError, TypeError): + pass if len(app_def_list) == 0: app_def = "Experiment" else: diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index dc19f8f14..ed8c8272f 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -113,7 +113,7 @@ } -class NexusMeasurement(Measurement): +class NexusMeasurement(Measurement, Schema): def normalize(self, archive, logger): try: app_entry = getattr(self, "ENTRY") From c1bc7d57c11344281dcb5fb93f4d517209922e3a Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 17 Jan 2025 22:28:43 +0100 Subject: [PATCH 02/31] not using inner sections --- src/pynxtools/nomad/schema.py | 48 +++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index ed8c8272f..e18c12769 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -633,7 +633,9 @@ def __create_group(xml_node: ET.Element, root_section: Section): nx_type = __rename_nx_for_nomad(xml_attrs["type"]) nx_name = xml_attrs.get("name", nx_type.upper()) - section_name = __rename_nx_for_nomad(nx_name, is_group=True) + section_name = ( + root_section.name + "__" + __rename_nx_for_nomad(nx_name, is_group=True) + ) group_section = Section(validate=VALIDATE, nx_kind="group", name=section_name) __attach_base_section(group_section, root_section, __to_section(nx_type)) @@ -651,8 +653,7 @@ def __create_group(xml_node: ET.Element, root_section: Section): variable=__if_template(nx_name), ) - root_section.inner_section_definitions.append(group_section) - + __section_definitions[section_name] = group_section root_section.sub_sections.append(group_subsection) __create_group(group, group_section) @@ -707,8 +708,13 @@ def __attach_base_section(section: Section, container: Section, default: Section a base-section with a suitable base. """ try: + newdefinitions = {} + for def_name, act_def in container.all_sub_sections.items(): + newdefinitions[def_name] = act_def.sub_section base_section = nexus_resolve_variadic_name( - container.all_inner_section_definitions, section.name, filter=default + newdefinitions, + section.name.split("__")[-1], + filter=default, ) except ValueError: base_section = None @@ -855,7 +861,7 @@ def __add_section_from_nxdl(xml_node: ET.Element) -> Optional[Section]: return None -def __create_package_from_nxdl_directories(nexus_section: Section) -> Package: +def __create_package_from_nxdl_directories() -> Package: """ Creates a metainfo package from the given nexus directory. Will generate the respective metainfo definitions from all the nxdl files in that directory. @@ -875,16 +881,28 @@ def __create_package_from_nxdl_directories(nexus_section: Section) -> Package: sections.append(section) sections.sort(key=lambda x: x.name) + nexus_sections = {} + for section_name in ["_Applications", "_BaseSections"]: # , '_InnerSections']: + nexus_sections[section_name] = Section(validate=VALIDATE, name=section_name) + package.section_definitions.append(nexus_sections[section_name]) for section in sections: package.section_definitions.append(section) - if section.nx_category == "application" or ( - section.nx_category == "base" and section.nx_name == "NXroot" - ): - nexus_section.sub_sections.append( + if section.nx_category == "application": + nexus_sections["_Applications"].sub_sections.append( + SubSection(section_def=section, name=section.name) + ) + elif section.nx_category == "base" and section.nx_name == "NXroot": + nexus_sections["_Applications"].sub_sections.append( SubSection(section_def=section, name=section.name) ) + elif section.nx_category == "base": + nexus_sections["_BaseSections"].sub_sections.append( + SubSection(section_def=section, name=section.name) + ) + for section_name in __section_definitions: + if "__" in section_name: + package.section_definitions.append(__section_definitions[section_name]) - package.section_definitions.append(nexus_section) return package @@ -916,14 +934,6 @@ def init_nexus_metainfo(): if nexus_metainfo_package is not None: return - # We take the application definitions and create a common parent section that allows - # to include nexus in an EntryArchive. - # To be able to register it into data section, it is expected that this section inherits from Schema. - nexus_section = Section( - validate=VALIDATE, name=__GROUPING_NAME, label=__GROUPING_NAME - ) - nexus_section.base_sections = [Schema.m_def] - # try: # load_nexus_schema('') # except Exception: @@ -932,7 +942,7 @@ def init_nexus_metainfo(): # save_nexus_schema('') # except Exception: # pass - nexus_metainfo_package = __create_package_from_nxdl_directories(nexus_section) + nexus_metainfo_package = __create_package_from_nxdl_directories() nexus_metainfo_package.section_definitions.append(NexusMeasurement.m_def) # We need to initialize the metainfo definitions. This is usually done automatically, From fc5b95b1976df75849ee97ceec283ad0886f75f4 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 21 Jan 2025 18:09:46 +0100 Subject: [PATCH 03/31] fix for doc links if name contains _ --- src/pynxtools/nomad/schema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index e18c12769..742933abf 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -364,7 +364,9 @@ def __get_documentation_url( ) nx_package = xml_parent.get("nxdl_base").split("/")[-1] anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) - return f"{doc_base}/{nx_package}/{anchor_segments[-1]}.html#{anchor}" + return ( + f"{doc_base}/{nx_package}/{anchor_segments[-1].replace("-", "_")}.html#{anchor}" + ) def __to_section(name: str, **kwargs) -> Section: From bfce048e016af1672848f837e53169d66ae8e9aa Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 21 Jan 2025 18:14:55 +0100 Subject: [PATCH 04/31] fix format --- src/pynxtools/nomad/schema.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 742933abf..66b6a3688 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -364,9 +364,8 @@ def __get_documentation_url( ) nx_package = xml_parent.get("nxdl_base").split("/")[-1] anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) - return ( - f"{doc_base}/{nx_package}/{anchor_segments[-1].replace("-", "_")}.html#{anchor}" - ) + nx_file = anchor_segments[-1].replace("-", "_") + return f"{doc_base}/{nx_package}/{nx_file}.html#{anchor}" def __to_section(name: str, **kwargs) -> Section: From 274e0d63718f2d9d9f01a26816c4ca7fc8cc9782 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 21 Jan 2025 18:37:49 +0100 Subject: [PATCH 05/31] linting --- src/pynxtools/nomad/schema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 66b6a3688..8aaf5c659 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -900,9 +900,9 @@ def __create_package_from_nxdl_directories() -> Package: nexus_sections["_BaseSections"].sub_sections.append( SubSection(section_def=section, name=section.name) ) - for section_name in __section_definitions: + for section_name, section in __section_definitions.items(): if "__" in section_name: - package.section_definitions.append(__section_definitions[section_name]) + package.section_definitions.append(section) return package From 27fa71c04d436ff59e09352b9351f73e7ea5eec6 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 21 Jan 2025 20:43:06 +0100 Subject: [PATCH 06/31] fixing tests --- src/pynxtools/nomad/parser.py | 22 ++++++++++------------ tests/nomad/test_parsing.py | 8 ++++---- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 00ec67b61..dd75c96a0 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -96,19 +96,12 @@ def _to_section( nomad_def_name = rename_nx_for_nomad(nomad_def_name, is_group=True) - # for groups, get the definition from the package - new_def = current.m_def.all_sub_sections[nomad_def_name] - - new_section: MSection = None # type:ignore - - for section in current.m_get_sub_sections(new_def): - if hdf_name is None or getattr(section, "nx_name", None) == hdf_name: - new_section = section - break - - if new_section is not None: - return new_section if current == nx_root: + # for groups, get the definition from the package + new_def = current.m_def.all_sub_sections["ENTRY"] + for section in current.m_get_sub_sections(new_def): + if hdf_name is None or getattr(section, "nx_name", None) == hdf_name: + return section cls = getattr(nexus_schema, nx_def, None) sec = cls() new_def_spec = sec.m_def.all_sub_sections[nomad_def_name] @@ -117,6 +110,11 @@ def _to_section( current.ENTRY.append(new_section) new_section.__dict__["nx_name"] = hdf_name else: + # for groups, get the definition from the package + new_def = current.m_def.all_sub_sections[nomad_def_name] + for section in current.m_get_sub_sections(new_def): + if hdf_name is None or getattr(section, "nx_name", None) == hdf_name: + return section current.m_create(new_def.section_def.section_cls) new_section = current.m_get_sub_section(new_def, -1) new_section.__dict__["nx_name"] = hdf_name diff --git a/tests/nomad/test_parsing.py b/tests/nomad/test_parsing.py index 8a71f9af3..b1dda4bb3 100644 --- a/tests/nomad/test_parsing.py +++ b/tests/nomad/test_parsing.py @@ -41,7 +41,7 @@ def test_nexus_example(): example_data = "src/pynxtools/data/201805_WSe2_arpes.nxs" NexusParser().parse(example_data, archive, get_logger(__name__)) - arpes_obj = getattr(archive.data, rename_nx_for_nomad("NXarpes")) + arpes_obj = archive.data assert arpes_obj.ENTRY[0].SAMPLE[0].pressure__field == ureg.Quantity( "3.27e-10*millibar" @@ -94,9 +94,9 @@ def test_nexus_example_with_renamed_groups(): os.path.dirname(__file__), "../data/nomad/NXlauetof.hdf5" ) NexusParser().parse(lauetof_data, archive, get_logger(__name__)) - lauetof_obj = getattr(archive.data, rename_nx_for_nomad("NXlauetof")) + lauetof_obj = archive.data - assert lauetof_obj.entry.name__group.time_of_flight__field == ureg.Quantity( + assert lauetof_obj.ENTRY[0].name__group.time_of_flight__field == ureg.Quantity( "1.0*second" ) - assert lauetof_obj.entry.sample.name__field == "SAMPLE-CHAR-DATA" + assert lauetof_obj.ENTRY[0].sample.name__field == "SAMPLE-CHAR-DATA" From 8fb49533810a179984acfd7686f2e4546536e648 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 24 Jan 2025 11:08:25 +0100 Subject: [PATCH 07/31] fixing mime-type for WSL --- src/pynxtools/nomad/entrypoints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/nomad/entrypoints.py b/src/pynxtools/nomad/entrypoints.py index 66b3cb843..77ad67617 100644 --- a/src/pynxtools/nomad/entrypoints.py +++ b/src/pynxtools/nomad/entrypoints.py @@ -63,7 +63,7 @@ def load(self): name="pynxtools parser", description="A parser for nexus files.", mainfile_name_re=r".*\.nxs", - mainfile_mime_re="application/x-hdf5", + mainfile_mime_re="application/x-hdf*", ) from nomad.config.models.ui import ( From bd22578f7e69662caf7e6538bbe11f1f0c0c91b4 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 24 Jan 2025 12:38:46 +0100 Subject: [PATCH 08/31] fix for handling raw files in subdirectories --- src/pynxtools/nomad/parser.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index dd75c96a0..6298a0167 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -484,7 +484,13 @@ def parse( self._logger = logger if logger else get_logger(__name__) self._clear_class_refs() - *_, self.nxs_fname = mainfile.rsplit("/", 1) + mf = mainfile.split("/") + # if filename does not follow the pattern + # .volumes/fs/////[subdirs?]/ + if len(mf) < 7: + self.nxs_fname = mainfile + else: + self.nxs_fname = "/".join(mf[6:]) nexus_helper = HandleNexus(logger, mainfile) nexus_helper.process_nexus_master_file(self.__nexus_populate) From 8f1a0b4fbc4478a98ce2c4e7691cb167f9db0ce2 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Wed, 29 Jan 2025 18:39:38 +0100 Subject: [PATCH 09/31] use references in steps, and results in NexusMeasurement --- src/pynxtools/nomad/schema.py | 65 ++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 8aaf5c659..98dbb6c82 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -33,8 +33,9 @@ try: from nomad import utils from nomad.datamodel import EntryArchive, EntryMetadata - from nomad.datamodel.data import EntryData, Schema + from nomad.datamodel.data import ArchiveSection, EntryData, Schema from nomad.datamodel.metainfo import basesections + from nomad.datamodel.metainfo.annotations import ELNAnnotation from nomad.datamodel.metainfo.basesections import ( ActivityResult, ActivityStep, @@ -101,14 +102,37 @@ __logger = get_logger(__name__) + +class NexusActivityStep(ActivityStep): + reference = Quantity( + type=ArchiveSection, + description="A reference to a NeXus Activity Step.", + a_eln=ELNAnnotation( + component="ReferenceEditQuantity", + label="section reference", + ), + ) + + +class NexusActivityResult(ActivityResult): + reference = Quantity( + type=ArchiveSection, + description="A reference to a NeXus Activity Result.", + a_eln=ELNAnnotation( + component="ReferenceEditQuantity", + label="section reference", + ), + ) + + __BASESECTIONS_MAP: Dict[str, Any] = { "NXfabrication": [basesections.Instrument], "NXsample": [CompositeSystem], "NXsample_component": [Component], "NXidentifier": [EntityReference], - "NXentry": [ActivityStep], - "NXprocess": [ActivityStep], - "NXdata": [ActivityResult], + "NXentry": [NexusActivityStep], + "NXprocess": [NexusActivityStep], + "NXdata": [NexusActivityResult], # "object": BaseSection, } @@ -121,23 +145,21 @@ def normalize(self, archive, logger): raise AttributeError() self.steps = [] for entry in app_entry: - sec_c = entry.m_copy() - self.steps.append(sec_c) + ref = NexusActivityStep(name=entry.name, reference=entry) + self.steps.append(ref) for sec in entry.m_all_contents(): if isinstance(sec, ActivityStep): - sec_c = sec.m_copy() - self.steps.append(sec_c) + ref = NexusActivityStep(name=sec.name, reference=sec) + self.steps.append(ref) elif isinstance(sec, basesections.Instrument): - ref = InstrumentReference(name=sec.name) - ref.reference = sec + ref = InstrumentReference(name=sec.name, reference=sec) self.instruments.append(ref) elif isinstance(sec, CompositeSystem): - ref = CompositeSystemReference(name=sec.name) - ref.reference = sec + ref = CompositeSystemReference(name=sec.name, reference=sec) self.samples.append(ref) elif isinstance(sec, ActivityResult): - sec_c = sec.m_copy() - self.results.append(sec_c) + ref = NexusActivityResult(name=sec.name, reference=sec) + self.results.append(ref) if self.m_def.name == "Root": self.method = "Generic Experiment" else: @@ -158,7 +180,7 @@ def normalize(self, archive, logger): act_array = archive.workflow2.tasks existing_items = {(task.name, task.section) for task in act_array} new_items = [ - item.to_task() + item.reference.to_task() for item in self.steps if (item.name, item) not in existing_items ] @@ -177,9 +199,9 @@ def normalize(self, archive, logger): act_array = archive.workflow2.outputs existing_items = {(link.name, link.section) for link in act_array} new_items = [ - Link(name=item.name, section=item) + Link(name=item.name, section=item.reference) for item in self.results - if (item.name, item) not in existing_items + if (item.name, item.reference) not in existing_items ] act_array.extend(new_items) @@ -945,6 +967,8 @@ def init_nexus_metainfo(): # pass nexus_metainfo_package = __create_package_from_nxdl_directories() nexus_metainfo_package.section_definitions.append(NexusMeasurement.m_def) + nexus_metainfo_package.section_definitions.append(NexusActivityStep.m_def) + nexus_metainfo_package.section_definitions.append(NexusActivityResult.m_def) # We need to initialize the metainfo definitions. This is usually done automatically, # when the metainfo schema is defined though MSection Python classes. @@ -983,6 +1007,13 @@ def normalize_fabrication(self, archive, logger): current_cls = __section_definitions[ __rename_nx_for_nomad("NXfabrication") ].section_cls + self.name = ( + self.__dict__["nx_name"] + + " (" + + ((self.vendor__field + " / ") if self.vendor__field else "") + + (self.model__field if self.model__field else "") + + ")" + ) super(current_cls, self).normalize(archive, logger) From de2e94a90d228da91e923d4c537ee4ac68807b09 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Thu, 30 Jan 2025 20:48:42 +0100 Subject: [PATCH 10/31] make nexus attributes searchable by importing them to NOMAD as Quantities --- src/pynxtools/nomad/parser.py | 39 ++++++++++++++++++++++------------- src/pynxtools/nomad/schema.py | 24 +++++++++++++++------ src/pynxtools/nomad/utils.py | 3 +-- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 6298a0167..138db791e 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -202,12 +202,22 @@ def _populate_data( attr_value = attr_value[0] # so values of non-scalar attribute will not end up in metainfo! - attr_name = attr_name + "__attribute" current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root) + attribute = attr_value + # TODO: get unit from attribute _units try: if nx_root or nx_parent.tag.endswith("group"): - current.m_set_section_attribute(attr_name, attr_value) + attribute_name = "___" + attr_name + metainfo_def = resolve_variadic_name( + current.m_def.all_properties, attribute_name + ) + if metainfo_def.use_full_storage: + attribute = MQuantity.wrap(attribute, attribute_name) + current.m_set(metainfo_def, attribute) + # if attributes are set before setting the quantity, a bug can cause them being set under a wrong variadic name + attribute.m_set_attribute("m_nx_data_path", hdf_node.name) + attribute.m_set_attribute("m_nx_data_file", self.nxs_fname) else: parent_html_name = nx_path[-2].get("name") @@ -216,25 +226,26 @@ def _populate_data( metainfo_def = None try: + attribute_name = parent_html_name + "___" + attr_name metainfo_def = resolve_variadic_name( - current.m_def.all_properties, parent_field_name + current.m_def.all_properties, attribute_name + ) + data_instance_name = ( + hdf_node.name.split("/")[-1] + "___" + attr_name ) + if metainfo_def.use_full_storage: + attribute = MQuantity.wrap( + attribute, data_instance_name + ) except ValueError as exc: self._logger.warning( - f"{current.m_def} has no suitable property for {parent_field_name}", + f"{current.m_def} has no suitable property for {parent_field_name} and {attr_name} as {attribute_name}", target_name=attr_name, exc_info=exc, ) - if parent_field_name in current.__dict__: - quantity = current.__dict__[parent_field_name] - if isinstance(quantity, dict): - quantity = quantity[parent_instance_name] - else: - quantity = None - raise Warning( - "setting attribute attempt before creating quantity" - ) - quantity.m_set_attribute(attr_name, attr_value) + current.m_set(metainfo_def, attribute) + attribute.m_set_attribute("m_nx_data_path", hdf_node.name) + attribute.m_set_attribute("m_nx_data_file", self.nxs_fname) except Exception as e: self._logger.warning( "error while setting attribute", diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 98dbb6c82..e2e3f1111 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -468,16 +468,19 @@ def __add_common_properties(xml_node: ET.Element, definition: Definition): definition.more["nx_optional"] = __if_base(xml_node) -def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantity]): +def __create_attributes( + xml_node: ET.Element, definition: Union[Section, Quantity], field: Quantity = None +): """ Add all attributes in the given nexus XML node to the given - Quantity or SubSection using the Attribute class (new mechanism). + Quantity or SubSection using a specially named Quantity class. todo: account for more attributes of attribute, e.g., default, minOccurs """ for attribute in xml_node.findall("nx:attribute", __XML_NAMESPACES): name = __rename_nx_for_nomad(attribute.get("name"), is_attribute=True) + shape: list = [] nx_enum = __get_enumeration(attribute) if nx_enum: nx_type = nx_enum @@ -496,8 +499,17 @@ def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantit else: nx_shape = [] - m_attribute = Attribute( - name=name, variable=__if_template(name), shape=nx_shape, type=nx_type + a_name = (field.more["nx_name"] if field else "") + "___" + name + m_attribute = Quantity( + name=a_name, + variable=__if_template(name) + or (__if_template(field.more["nx_name"]) if field else False), + shape=shape, + type=nx_type, + flexible_unit=True, + ) + m_attribute.more.update( + dict(nx_kind="attribute") # , nx_type=nx_type, nx_shape=nx_shape) ) for name, value in attribute.items(): @@ -505,7 +517,7 @@ def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantit __add_common_properties(attribute, m_attribute) - definition.attributes.append(m_attribute) + definition.quantities.append(m_attribute) def __add_additional_attributes(definition: Definition): @@ -637,7 +649,7 @@ def __create_field(xml_node: ET.Element, container: Section) -> Quantity: container.quantities.append(value_quantity) - __create_attributes(xml_node, value_quantity) + __create_attributes(xml_node, container, value_quantity) return value_quantity diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py index 794a94e60..30916ec1d 100644 --- a/src/pynxtools/nomad/utils.py +++ b/src/pynxtools/nomad/utils.py @@ -79,6 +79,5 @@ def __rename_nx_for_nomad( elif is_field: name += "__field" elif is_attribute: - name += "__attribute" - + pass return name From 9c4a36126bb7cdb28c02f27dcf063416181eb59b Mon Sep 17 00:00:00 2001 From: GinzburgLev Date: Tue, 4 Feb 2025 10:42:17 +0100 Subject: [PATCH 11/31] temporary fix for boolean array as signals or axes - show first element as the field value --- src/pynxtools/nomad/parser.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 138db791e..dfc90a995 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -129,6 +129,9 @@ def _get_value(hdf_node): hdf_value = hdf_node[...] if str(hdf_value.dtype) == "bool": + if isinstance(hdf_value, (list, tuple, np.ndarray)): + # temporary solution for boolean arrays + return bool(hdf_value[0]) return bool(hdf_value) if hdf_value.dtype.kind in "iufc": return hdf_value From 2c4e3f66ca88329a3f66065bed379cf0cc97ba86 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 4 Feb 2025 18:34:22 +0100 Subject: [PATCH 12/31] handling arrays of strings --- src/pynxtools/nomad/parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index dfc90a995..0aa7b0c96 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -136,7 +136,7 @@ def _get_value(hdf_node): if hdf_value.dtype.kind in "iufc": return hdf_value if len(hdf_value.shape) > 0: - return hdf_value.astype(str) + return str([i for i in hdf_value.astype(str)]) return hdf_node[()].decode() @@ -203,6 +203,8 @@ def _populate_data( attr_value = attr_value.tolist() if len(attr_value) == 1: attr_value = attr_value[0] + else: + attr_value = str(attr_value) # so values of non-scalar attribute will not end up in metainfo! current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root) From 4aabf3beb79ca68422805a36649a7791ba15e589 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 4 Feb 2025 18:47:18 +0100 Subject: [PATCH 13/31] handling attribute if it is an array of numbers --- src/pynxtools/nomad/parser.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 0aa7b0c96..b0aaed7f8 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -200,12 +200,11 @@ def _populate_data( attr_value = hdf_node.attrs[attr_name] if not isinstance(attr_value, str): if isinstance(attr_value, np.ndarray): - attr_value = attr_value.tolist() - if len(attr_value) == 1: - attr_value = attr_value[0] + attr_list = attr_value.tolist() + if len(attr_list) == 1 or attr_value.dtype.kind in "iufc": + attr_value = attr_list[0] else: - attr_value = str(attr_value) - # so values of non-scalar attribute will not end up in metainfo! + attr_value = str(attr_list) current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root) From 2d7106b9d099ea490f9044556ca2e415283202e4 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 4 Feb 2025 19:39:10 +0100 Subject: [PATCH 14/31] fix for handling bool (arrays) coming from hdf5 --- src/pynxtools/nomad/parser.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index b0aaed7f8..4b9bebe69 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -129,9 +129,8 @@ def _get_value(hdf_node): hdf_value = hdf_node[...] if str(hdf_value.dtype) == "bool": - if isinstance(hdf_value, (list, tuple, np.ndarray)): - # temporary solution for boolean arrays - return bool(hdf_value[0]) + if len(hdf_value.shape) > 0: + return bool(hdf_value.tolist()[0]) return bool(hdf_value) if hdf_value.dtype.kind in "iufc": return hdf_value From dfffd21c9fb9acf78eae7ed523bbe5e362e3c0a5 Mon Sep 17 00:00:00 2001 From: Laurenz Rettig <53396064+rettigl@users.noreply.github.com> Date: Wed, 5 Feb 2025 17:44:00 +0100 Subject: [PATCH 15/31] add array size and ndim as attributes (#537) --- src/pynxtools/nomad/parser.py | 18 ++++++++++-------- src/pynxtools/nomad/schema.py | 19 ++++++++++++------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 4b9bebe69..14971899f 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -272,14 +272,14 @@ def _populate_data( if isinstance(field, np.ndarray) and field.size > 1: mask = np.isfinite(field) if np.any(mask): - field_stats = np.array( - [ - np.mean(field[mask]), - np.var(field[mask]), - np.min(field[mask]), - np.max(field[mask]), - ] - ) + field_stats = [ + np.mean(field[mask]), + np.var(field[mask]), + np.min(field[mask]), + np.max(field[mask]), + np.size(field), + np.ndim(field), + ] field = field_stats[0] if not np.isfinite(field): self._logger.warning( @@ -330,6 +330,8 @@ def _populate_data( field.m_set_attribute("nx_data_var", field_stats[1]) field.m_set_attribute("nx_data_min", field_stats[2]) field.m_set_attribute("nx_data_max", field_stats[3]) + field.m_set_attribute("nx_data_size", field_stats[4]) + field.m_set_attribute("nx_data_ndim", field_stats[5]) except Exception as e: self._logger.warning( "error while setting field", diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index e2e3f1111..cbb9a0a5f 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -553,12 +553,17 @@ def __add_additional_attributes(definition: Definition): ): return - for nx_array_attr in [ - "nx_data_mean", - "nx_data_var", - "nx_data_min", - "nx_data_max", - ]: + for nx_array_attr, dtype in zip( + [ + "nx_data_mean", + "nx_data_var", + "nx_data_min", + "nx_data_max", + "nx_data_size", + "nx_data_ndim", + ], + [np.float64, np.float64, np.float64, np.float64, np.int32, np.int32], + ): if nx_array_attr in definition.all_attributes: continue definition.attributes.append( @@ -566,7 +571,7 @@ def __add_additional_attributes(definition: Definition): name=nx_array_attr, variable=False, shape=[], - type=np.float64, + type=dtype, description="This is a NeXus template property. " "This attribute holds specific statistics of the NeXus data array.", ) From e2371c44cda455cd3cb7c7d092e50144383cd246 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Thu, 6 Feb 2025 18:38:27 +0100 Subject: [PATCH 16/31] field statistics are now searchable quantities themselves --- src/pynxtools/nomad/parser.py | 29 ++++++++----- src/pynxtools/nomad/schema.py | 79 ++++++++++++++++++++--------------- src/pynxtools/nomad/utils.py | 8 ++++ 3 files changed, 72 insertions(+), 44 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 14971899f..1d3725fa0 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -40,7 +40,7 @@ import pynxtools.nomad.schema as nexus_schema from pynxtools.nexus.nexus import HandleNexus -from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX +from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, get_quantity_base_name from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad @@ -323,15 +323,24 @@ def _populate_data( field.m_set_attribute("m_nx_data_path", hdf_node.name) field.m_set_attribute("m_nx_data_file", self.nxs_fname) if field_stats is not None: - # TODO _add_additional_attributes function has created these nx_data_* - # attributes speculatively already so if the field_stats is None - # this will cause unpopulated attributes in the GUI - field.m_set_attribute("nx_data_mean", field_stats[0]) - field.m_set_attribute("nx_data_var", field_stats[1]) - field.m_set_attribute("nx_data_min", field_stats[2]) - field.m_set_attribute("nx_data_max", field_stats[3]) - field.m_set_attribute("nx_data_size", field_stats[4]) - field.m_set_attribute("nx_data_ndim", field_stats[5]) + concept_basename = get_quantity_base_name(field.name) + instancename = get_quantity_base_name(data_instance_name) + for suffix, stat in zip( + [ + "__mean", + "__var", + "__min", + "__max", + "__size", + "__dim", + ], + field_stats[1:], + ): + stat_metainfo_def = resolve_variadic_name( + current.m_def.all_properties, concept_basename + suffix + ) + stat = MQuantity.wrap(stat, instancename + suffix) + current.m_set(stat_metainfo_def, stat) except Exception as e: self._logger.warning( "error while setting field", diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index cbb9a0a5f..b0333546b 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -82,7 +82,11 @@ from pynxtools import get_definitions_url from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path -from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, __rename_nx_for_nomad +from pynxtools.nomad.utils import ( + __REPLACEMENT_FOR_NX, + __rename_nx_for_nomad, + get_quantity_base_name, +) # __URL_REGEXP from # https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url @@ -516,11 +520,47 @@ def __create_attributes( m_attribute.more[f"nx_{name}"] = value __add_common_properties(attribute, m_attribute) + # TODO: decide if stats should be made searchable for attributes, too + # __add_quantity_stats(definition,m_attribute) definition.quantities.append(m_attribute) -def __add_additional_attributes(definition: Definition): +def __add_quantity_stats(container: Section, quantity: Quantity): + # TODO We should also check the shape of the quantity and the datatype as + # the statistics are always mapping on float64 even if quantity values are ints + if not quantity.name.endswith("__field") or ( + quantity.type not in [np.float64, np.int64, np.uint64] + and not isinstance(quantity.type, Number) + ): + return + basename = get_quantity_base_name(quantity.name) + print(quantity.name, basename) + for suffix, dtype in zip( + [ + "__mean", + "__var", + "__min", + "__max", + "__size", + "__ndim", + ], + [np.float64, np.float64, None, None, np.int32, np.int32], + ): + print(basename + suffix) + container.quantities.append( + Quantity( + name=basename + suffix, + variable=quantity.variable, + shape=[], + type=dtype if dtype else quantity.type, + description="This is a NeXus template property. " + "This quantity holds specific statistics of the NeXus data array.", + ) + ) + + +def __add_additional_attributes(definition: Definition, container: Section): if "m_nx_data_path" not in definition.attributes: definition.attributes.append( Attribute( @@ -546,36 +586,7 @@ def __add_additional_attributes(definition: Definition): ) if isinstance(definition, Quantity): - # TODO We should also check the shape of the quantity and the datatype as - # the statistics are always mapping on float64 even if quantity values are ints - if definition.type not in [np.float64, np.int64, np.uint64] and not isinstance( - definition.type, Number - ): - return - - for nx_array_attr, dtype in zip( - [ - "nx_data_mean", - "nx_data_var", - "nx_data_min", - "nx_data_max", - "nx_data_size", - "nx_data_ndim", - ], - [np.float64, np.float64, np.float64, np.float64, np.int32, np.int32], - ): - if nx_array_attr in definition.all_attributes: - continue - definition.attributes.append( - Attribute( - name=nx_array_attr, - variable=False, - shape=[], - type=dtype, - description="This is a NeXus template property. " - "This attribute holds specific statistics of the NeXus data array.", - ) - ) + __add_quantity_stats(container, definition) def __create_field(xml_node: ET.Element, container: Section) -> Quantity: @@ -1005,9 +1016,9 @@ def init_nexus_metainfo(): for section in sections: if not (str(section).startswith("pynxtools.")): continue - __add_additional_attributes(section) + __add_additional_attributes(section, None) for quantity in section.quantities: - __add_additional_attributes(quantity) + __add_additional_attributes(quantity, section) # We skip the Python code generation for now and offer Python classes as variables # TO DO not necessary right now, could also be done case-by-case by the nexus parser diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py index 30916ec1d..8ea64ae3d 100644 --- a/src/pynxtools/nomad/utils.py +++ b/src/pynxtools/nomad/utils.py @@ -81,3 +81,11 @@ def __rename_nx_for_nomad( elif is_attribute: pass return name + + +def get_quantity_base_name(quantity_name): + return ( + quantity_name[:-7] + if quantity_name.endswith("__field") and quantity_name[-8] != "_" + else quantity_name + ) From 35080798825e0573426b015904206ee2d556ebf3 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Thu, 6 Feb 2025 18:59:14 +0100 Subject: [PATCH 17/31] NexusBaseSection for registering NeXUs Group instance names as searchable quantity --- src/pynxtools/nomad/schema.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index b0333546b..06e6aea0a 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -107,6 +107,13 @@ __logger = get_logger(__name__) +class NexusBaseSection(BaseSection): + def normalize(self, archive, logger): + if self.__dict__["nx_name"]: + self.name = self.__dict__["nx_name"] + super().normalize(archive, logger) + + class NexusActivityStep(ActivityStep): reference = Quantity( type=ArchiveSection, @@ -535,7 +542,6 @@ def __add_quantity_stats(container: Section, quantity: Quantity): ): return basename = get_quantity_base_name(quantity.name) - print(quantity.name, basename) for suffix, dtype in zip( [ "__mean", @@ -547,7 +553,6 @@ def __add_quantity_stats(container: Section, quantity: Quantity): ], [np.float64, np.float64, None, None, np.int32, np.int32], ): - print(basename + suffix) container.quantities.append( Quantity( name=basename + suffix, @@ -796,7 +801,7 @@ def __create_class_section(xml_node: ET.Element) -> Section: [NexusMeasurement] if xml_attrs["extends"] == "NXobject" else [] ) else: - nomad_base_sec_cls = __BASESECTIONS_MAP.get(nx_name, [BaseSection]) + nomad_base_sec_cls = __BASESECTIONS_MAP.get(nx_name, [NexusBaseSection]) nx_name = __rename_nx_for_nomad(nx_name) class_section: Section = __to_section( @@ -997,6 +1002,7 @@ def init_nexus_metainfo(): nexus_metainfo_package.section_definitions.append(NexusMeasurement.m_def) nexus_metainfo_package.section_definitions.append(NexusActivityStep.m_def) nexus_metainfo_package.section_definitions.append(NexusActivityResult.m_def) + nexus_metainfo_package.section_definitions.append(NexusBaseSection.m_def) # We need to initialize the metainfo definitions. This is usually done automatically, # when the metainfo schema is defined though MSection Python classes. From ebafcf71c8708bca4350e5b6335a789f58f9aa08 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 7 Feb 2025 12:07:37 +0100 Subject: [PATCH 18/31] fix for registering stats --- src/pynxtools/nomad/parser.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 1d3725fa0..faab54ba7 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -40,8 +40,9 @@ import pynxtools.nomad.schema as nexus_schema from pynxtools.nexus.nexus import HandleNexus -from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, get_quantity_base_name +from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad +from pynxtools.nomad.utils import get_quantity_base_name def _to_group_name(nx_node: ET.Element): @@ -332,9 +333,9 @@ def _populate_data( "__min", "__max", "__size", - "__dim", + "__ndim", ], - field_stats[1:], + field_stats, ): stat_metainfo_def = resolve_variadic_name( current.m_def.all_properties, concept_basename + suffix From 27421b6d976cf7481a27f202f63fb48721f402f5 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Wed, 12 Feb 2025 11:06:19 +0100 Subject: [PATCH 19/31] searcable __name for variadic quantities --- src/pynxtools/nomad/parser.py | 19 +++++++++++++++++-- src/pynxtools/nomad/schema.py | 31 ++++++++++++++++++++++++------- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index faab54ba7..97cebe12c 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -266,6 +266,7 @@ def _populate_data( metainfo_def = resolve_variadic_name( current.m_def.all_properties, field_name ) + isvariadic = any(char.isupper() for char in metainfo_def.more["nx_name"]) # for data arrays only statistics if not all values NINF, Inf, or NaN field_stats = None @@ -309,6 +310,9 @@ def _populate_data( else: pint_unit = ureg.parse_units("1") field = ureg.Quantity(field, pint_unit) + if field_stats is not None: + for i in range(4): + field_stats[i] = ureg.Quantity(field_stats[i], pint_unit) except (ValueError, UndefinedUnitError): pass @@ -323,25 +327,36 @@ def _populate_data( current.m_set(metainfo_def, field) field.m_set_attribute("m_nx_data_path", hdf_node.name) field.m_set_attribute("m_nx_data_file", self.nxs_fname) + if isvariadic: + concept_basename = get_quantity_base_name(field.name) + instancename = get_quantity_base_name(data_instance_name) + name_metainfo_def = resolve_variadic_name( + current.m_def.all_properties, concept_basename + "__name" + ) + name_value = MQuantity.wrap(instancename, instancename + "__name") + current.m_set(name_metainfo_def, name_value) + name_value.m_set_attribute("m_nx_data_path", hdf_node.name) + name_value.m_set_attribute("m_nx_data_file", self.nxs_fname) if field_stats is not None: concept_basename = get_quantity_base_name(field.name) instancename = get_quantity_base_name(data_instance_name) for suffix, stat in zip( [ - "__mean", "__var", "__min", "__max", "__size", "__ndim", ], - field_stats, + field_stats[1:], ): stat_metainfo_def = resolve_variadic_name( current.m_def.all_properties, concept_basename + suffix ) stat = MQuantity.wrap(stat, instancename + suffix) current.m_set(stat_metainfo_def, stat) + stat.m_set_attribute("m_nx_data_path", hdf_node.name) + stat.m_set_attribute("m_nx_data_file", self.nxs_fname) except Exception as e: self._logger.warning( "error while setting field", diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 06e6aea0a..5b77d4b38 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -527,7 +527,7 @@ def __create_attributes( m_attribute.more[f"nx_{name}"] = value __add_common_properties(attribute, m_attribute) - # TODO: decide if stats should be made searchable for attributes, too + # TODO: decide if stats/instancename should be made searchable for attributes, too # __add_quantity_stats(definition,m_attribute) definition.quantities.append(m_attribute) @@ -536,22 +536,39 @@ def __create_attributes( def __add_quantity_stats(container: Section, quantity: Quantity): # TODO We should also check the shape of the quantity and the datatype as # the statistics are always mapping on float64 even if quantity values are ints - if not quantity.name.endswith("__field") or ( - quantity.type not in [np.float64, np.int64, np.uint64] - and not isinstance(quantity.type, Number) - ): + if not quantity.name.endswith("__field"): + return + isvariadic = any(char.isupper() for char in quantity.more["nx_name"]) + notnumber = quantity.type not in [ + np.float64, + np.int64, + np.uint64, + ] and not isinstance(quantity.type, Number) + if notnumber or not isvariadic: return basename = get_quantity_base_name(quantity.name) + if isvariadic: + container.quantities.append( + Quantity( + name=basename + "__name", + variable=quantity.variable, + shape=[], + type=str, + description="This is a NeXus template property. " + "This quantity holds the instance name of a NeXus Field.", + ) + ) + if notnumber: + return for suffix, dtype in zip( [ - "__mean", "__var", "__min", "__max", "__size", "__ndim", ], - [np.float64, np.float64, None, None, np.int32, np.int32], + [np.float64, None, None, np.int32, np.int32], ): container.quantities.append( Quantity( From cb4fa29f805a7a46c085f63e8af830fcd5264b13 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Thu, 13 Feb 2025 12:46:13 +0100 Subject: [PATCH 20/31] bringing statistic definitions to a common place --- src/pynxtools/nomad/parser.py | 21 +++++++-------------- src/pynxtools/nomad/schema.py | 11 +++-------- src/pynxtools/nomad/utils.py | 8 ++++++++ 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 97cebe12c..83fc9c895 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -40,7 +40,7 @@ import pynxtools.nomad.schema as nexus_schema from pynxtools.nexus.nexus import HandleNexus -from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX +from pynxtools.nomad.utils import __FIELD_STATISTICS, __REPLACEMENT_FOR_NX from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad from pynxtools.nomad.utils import get_quantity_base_name @@ -275,12 +275,11 @@ def _populate_data( mask = np.isfinite(field) if np.any(mask): field_stats = [ - np.mean(field[mask]), - np.var(field[mask]), - np.min(field[mask]), - np.max(field[mask]), - np.size(field), - np.ndim(field), + func(field[mask] if ismask else field) + for func, ismask in zip( + __FIELD_STATISTICS["function"], + __FIELD_STATISTICS["mask"], + ) ] field = field_stats[0] if not np.isfinite(field): @@ -341,13 +340,7 @@ def _populate_data( concept_basename = get_quantity_base_name(field.name) instancename = get_quantity_base_name(data_instance_name) for suffix, stat in zip( - [ - "__var", - "__min", - "__max", - "__size", - "__ndim", - ], + __FIELD_STATISTICS["suffix"][1:], field_stats[1:], ): stat_metainfo_def = resolve_variadic_name( diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 5b77d4b38..c990a37f0 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -83,6 +83,7 @@ from pynxtools import get_definitions_url from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path from pynxtools.nomad.utils import ( + __FIELD_STATISTICS, __REPLACEMENT_FOR_NX, __rename_nx_for_nomad, get_quantity_base_name, @@ -561,14 +562,8 @@ def __add_quantity_stats(container: Section, quantity: Quantity): if notnumber: return for suffix, dtype in zip( - [ - "__var", - "__min", - "__max", - "__size", - "__ndim", - ], - [np.float64, None, None, np.int32, np.int32], + __FIELD_STATISTICS["suffix"][1:], + __FIELD_STATISTICS["type"][1:], ): container.quantities.append( Quantity( diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py index 8ea64ae3d..79b1a3aae 100644 --- a/src/pynxtools/nomad/utils.py +++ b/src/pynxtools/nomad/utils.py @@ -89,3 +89,11 @@ def get_quantity_base_name(quantity_name): if quantity_name.endswith("__field") and quantity_name[-8] != "_" else quantity_name ) + + +__FIELD_STATISTICS = { + "suffix": ["__mean", "__var", "__min", "__max", "__size", "__ndim"], + "function": [np.mean, np.var, np.min, np.max, np.size, np.ndim], + "type": [np.float64, np.float64, None, None, np.int32, np.int32], + "mask": [True, True, True, True, False, False], +} From f6548e9f9a2ed4aff82f16e7460cc04117a2fddd Mon Sep 17 00:00:00 2001 From: sanbrock <45483558+sanbrock@users.noreply.github.com> Date: Thu, 13 Feb 2025 12:56:37 +0100 Subject: [PATCH 21/31] Update src/pynxtools/nomad/parser.py Co-authored-by: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> --- src/pynxtools/nomad/parser.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 83fc9c895..eb4dbfb2c 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -518,13 +518,9 @@ def parse( self._logger = logger if logger else get_logger(__name__) self._clear_class_refs() - mf = mainfile.split("/") # if filename does not follow the pattern # .volumes/fs/////[subdirs?]/ - if len(mf) < 7: - self.nxs_fname = mainfile - else: - self.nxs_fname = "/".join(mf[6:]) + self.nxs_fname = "/".join(mainfile.split("/")[6:]) or mainfile nexus_helper = HandleNexus(logger, mainfile) nexus_helper.process_nexus_master_file(self.__nexus_populate) From 1231d170e31b93b431754f0b6abf365ba1f5396f Mon Sep 17 00:00:00 2001 From: sanbrock <45483558+sanbrock@users.noreply.github.com> Date: Thu, 13 Feb 2025 14:05:49 +0100 Subject: [PATCH 22/31] Update src/pynxtools/nomad/schema.py Co-authored-by: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> --- src/pynxtools/nomad/schema.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index c990a37f0..5b7f39adf 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -955,18 +955,15 @@ def __create_package_from_nxdl_directories() -> Package: package.section_definitions.append(nexus_sections[section_name]) for section in sections: package.section_definitions.append(section) - if section.nx_category == "application": - nexus_sections["_Applications"].sub_sections.append( - SubSection(section_def=section, name=section.name) - ) - elif section.nx_category == "base" and section.nx_name == "NXroot": - nexus_sections["_Applications"].sub_sections.append( - SubSection(section_def=section, name=section.name) - ) + if section.nx_category == "application" or section.nx_name == "NXroot": + key = "_Applications" elif section.nx_category == "base": - nexus_sections["_BaseSections"].sub_sections.append( - SubSection(section_def=section, name=section.name) - ) + key = "_BaseSections" + else: + key = None + + if key: + nexus_sections[key].sub_sections.append(SubSection(section_def=section, name=section.name)) for section_name, section in __section_definitions.items(): if "__" in section_name: package.section_definitions.append(section) From 5259fd28dc9ce835715c027c912899fa552f805e Mon Sep 17 00:00:00 2001 From: sanbrock Date: Thu, 13 Feb 2025 14:29:38 +0100 Subject: [PATCH 23/31] use mapping instead of if/elif-s --- src/pynxtools/nomad/schema.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 5b7f39adf..6eb83c112 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -159,19 +159,17 @@ def normalize(self, archive, logger): for entry in app_entry: ref = NexusActivityStep(name=entry.name, reference=entry) self.steps.append(ref) + mapping = { + ActivityStep: (NexusActivityStep, self.steps), + basesections.Instrument: (InstrumentReference, self.instruments), + CompositeSystem: (CompositeSystemReference, self.samples), + ActivityResult: (NexusActivityResult, self.results), + } for sec in entry.m_all_contents(): - if isinstance(sec, ActivityStep): - ref = NexusActivityStep(name=sec.name, reference=sec) - self.steps.append(ref) - elif isinstance(sec, basesections.Instrument): - ref = InstrumentReference(name=sec.name, reference=sec) - self.instruments.append(ref) - elif isinstance(sec, CompositeSystem): - ref = CompositeSystemReference(name=sec.name, reference=sec) - self.samples.append(ref) - elif isinstance(sec, ActivityResult): - ref = NexusActivityResult(name=sec.name, reference=sec) - self.results.append(ref) + for cls, (ref_cls, collection) in mapping.items(): + if isinstance(sec, cls): + collection.append(ref_cls(name=sec.name, reference=sec)) + break if self.m_def.name == "Root": self.method = "Generic Experiment" else: @@ -963,7 +961,9 @@ def __create_package_from_nxdl_directories() -> Package: key = None if key: - nexus_sections[key].sub_sections.append(SubSection(section_def=section, name=section.name)) + nexus_sections[key].sub_sections.append( + SubSection(section_def=section, name=section.name) + ) for section_name, section in __section_definitions.items(): if "__" in section_name: package.section_definitions.append(section) From 3f3d09d51ac69cc2961e1c51146c8bc5b89f7c32 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Thu, 13 Feb 2025 15:19:59 +0100 Subject: [PATCH 24/31] manage std instead of var --- src/pynxtools/nomad/parser.py | 16 ++++++++++------ src/pynxtools/nomad/utils.py | 6 ++++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index eb4dbfb2c..8672d5a27 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -40,7 +40,8 @@ import pynxtools.nomad.schema as nexus_schema from pynxtools.nexus.nexus import HandleNexus -from pynxtools.nomad.utils import __FIELD_STATISTICS, __REPLACEMENT_FOR_NX +from pynxtools.nomad.utils import __FIELD_STATISTICS as FIELD_STATISTICS +from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad from pynxtools.nomad.utils import get_quantity_base_name @@ -277,8 +278,8 @@ def _populate_data( field_stats = [ func(field[mask] if ismask else field) for func, ismask in zip( - __FIELD_STATISTICS["function"], - __FIELD_STATISTICS["mask"], + FIELD_STATISTICS["function"], + FIELD_STATISTICS["mask"], ) ] field = field_stats[0] @@ -310,8 +311,11 @@ def _populate_data( pint_unit = ureg.parse_units("1") field = ureg.Quantity(field, pint_unit) if field_stats is not None: - for i in range(4): - field_stats[i] = ureg.Quantity(field_stats[i], pint_unit) + for i in range(len(field_stats)): + if FIELD_STATISTICS["mask"][i]: + field_stats[i] = ureg.Quantity( + field_stats[i], pint_unit + ) except (ValueError, UndefinedUnitError): pass @@ -340,7 +344,7 @@ def _populate_data( concept_basename = get_quantity_base_name(field.name) instancename = get_quantity_base_name(data_instance_name) for suffix, stat in zip( - __FIELD_STATISTICS["suffix"][1:], + FIELD_STATISTICS["suffix"][1:], field_stats[1:], ): stat_metainfo_def = resolve_variadic_name( diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py index 79b1a3aae..3e25da7c5 100644 --- a/src/pynxtools/nomad/utils.py +++ b/src/pynxtools/nomad/utils.py @@ -18,6 +18,8 @@ from typing import Optional +import numpy as np + __REPLACEMENT_FOR_NX = "" # This is a list of NeXus group names that are not allowed because they are defined as quantities in the BaseSection class. @@ -92,8 +94,8 @@ def get_quantity_base_name(quantity_name): __FIELD_STATISTICS = { - "suffix": ["__mean", "__var", "__min", "__max", "__size", "__ndim"], - "function": [np.mean, np.var, np.min, np.max, np.size, np.ndim], + "suffix": ["__mean", "__std", "__min", "__max", "__size", "__ndim"], + "function": [np.mean, np.std, np.min, np.max, np.size, np.ndim], "type": [np.float64, np.float64, None, None, np.int32, np.int32], "mask": [True, True, True, True, False, False], } From 81218f894a3d64c52fd4fc45293c5b167a1b583b Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 14 Feb 2025 13:07:23 +0100 Subject: [PATCH 25/31] fix for ganareting __name/stat quantities --- src/pynxtools/nomad/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 6eb83c112..cb39d9aa6 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -543,7 +543,7 @@ def __add_quantity_stats(container: Section, quantity: Quantity): np.int64, np.uint64, ] and not isinstance(quantity.type, Number) - if notnumber or not isvariadic: + if notnumber and not isvariadic: return basename = get_quantity_base_name(quantity.name) if isvariadic: From 1d6c2cc1d1603d55c3f60e37543538b309898ea2 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 14 Feb 2025 13:08:01 +0100 Subject: [PATCH 26/31] fix for handling attributes --- src/pynxtools/nomad/parser.py | 85 ++++++++++++++++------------------- 1 file changed, 38 insertions(+), 47 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 8672d5a27..1718251cd 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -27,7 +27,7 @@ from nomad.datamodel import EntryArchive, EntryMetadata from nomad.datamodel.data import EntryData from nomad.datamodel.results import Material, Results - from nomad.metainfo import MSection + from nomad.metainfo import MEnum, MSection from nomad.metainfo.util import MQuantity, MSubSectionList, resolve_variadic_name from nomad.parsing import MatchingParser from nomad.units import ureg @@ -199,61 +199,52 @@ def _populate_data( attr_name = nx_attr.get("name") # could be 1D array, float or int attr_value = hdf_node.attrs[attr_name] - if not isinstance(attr_value, str): - if isinstance(attr_value, np.ndarray): - attr_list = attr_value.tolist() - if len(attr_list) == 1 or attr_value.dtype.kind in "iufc": - attr_value = attr_list[0] - else: - attr_value = str(attr_list) - current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root) - - attribute = attr_value - # TODO: get unit from attribute _units try: if nx_root or nx_parent.tag.endswith("group"): - attribute_name = "___" + attr_name + parent_html_name = "" + parent_name = "" + parent_field_name = "" + else: + parent_html_name = nx_path[-2].get("name") + parent_name = hdf_node.name.split("/")[-1] + parent_field_name = parent_html_name + "__field" + attribute_name = parent_html_name + "___" + attr_name + data_instance_name = parent_name + "___" + attr_name + metainfo_def = None + try: metainfo_def = resolve_variadic_name( current.m_def.all_properties, attribute_name ) + attribute = attr_value + # TODO: get unit from attribute _units + if isinstance(metainfo_def.type, MEnum): + attribute = str(attr_value) + elif not isinstance(attr_value, str): + if isinstance(attr_value, np.ndarray): + attr_list = attr_value.tolist() + if ( + len(attr_list) == 1 + or attr_value.dtype.kind in "iufc" + ): + attribute = attr_list[0] + else: + attribute = str(attr_list) if metainfo_def.use_full_storage: - attribute = MQuantity.wrap(attribute, attribute_name) - current.m_set(metainfo_def, attribute) - # if attributes are set before setting the quantity, a bug can cause them being set under a wrong variadic name - attribute.m_set_attribute("m_nx_data_path", hdf_node.name) - attribute.m_set_attribute("m_nx_data_file", self.nxs_fname) - else: - parent_html_name = nx_path[-2].get("name") - - parent_instance_name = hdf_node.name.split("/")[-1] + "__field" - parent_field_name = parent_html_name + "__field" - - metainfo_def = None - try: - attribute_name = parent_html_name + "___" + attr_name - metainfo_def = resolve_variadic_name( - current.m_def.all_properties, attribute_name - ) - data_instance_name = ( - hdf_node.name.split("/")[-1] + "___" + attr_name - ) - if metainfo_def.use_full_storage: - attribute = MQuantity.wrap( - attribute, data_instance_name - ) - except ValueError as exc: - self._logger.warning( - f"{current.m_def} has no suitable property for {parent_field_name} and {attr_name} as {attribute_name}", - target_name=attr_name, - exc_info=exc, - ) - current.m_set(metainfo_def, attribute) - attribute.m_set_attribute("m_nx_data_path", hdf_node.name) - attribute.m_set_attribute("m_nx_data_file", self.nxs_fname) + attribute = MQuantity.wrap(attribute, data_instance_name) + except ValueError as exc: + self._logger.warning( + f"{current.m_def} has no suitable property for {parent_field_name} and {attr_name} as {attribute_name}", + target_name=attr_name, + exc_info=exc, + ) + current.m_set(metainfo_def, attribute) + # if attributes are set before setting the quantity, a bug can cause them being set under a wrong variadic name + attribute.m_set_attribute("m_nx_data_path", hdf_node.name) + attribute.m_set_attribute("m_nx_data_file", self.nxs_fname) except Exception as e: self._logger.warning( - "error while setting attribute", + f"error while setting attribute {data_instance_name} in {current.m_def} as {metainfo_def}", target_name=attr_name, exc_info=e, ) From 04bd30a02df085275884cc911ae95ceeaaf08eda Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 14 Feb 2025 13:08:45 +0100 Subject: [PATCH 27/31] ruffing --- src/pynxtools/dataconverter/helpers.py | 8 +++----- .../dataconverter/readers/example/reader.py | 18 ++++++------------ .../dataconverter/readers/json_map/reader.py | 2 +- src/pynxtools/nexus/nexus.py | 4 ++-- src/pynxtools/testing/nexus_conversion.py | 6 +++--- src/pynxtools/testing/nomad_example.py | 6 +++--- tests/dataconverter/test_helpers.py | 4 ++-- tests/nexus/test_nexus.py | 12 ++++++------ 8 files changed, 26 insertions(+), 34 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 768fdbed0..eb588d675 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -80,13 +80,11 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar if log_type == ValidationProblem.UnitWithoutDocumentation: logger.warning( - f"The unit, {path} = {value}, " - "is being written but has no documentation" + f"The unit, {path} = {value}, is being written but has no documentation" ) elif log_type == ValidationProblem.InvalidEnum: logger.warning( - f"The value at {path} should be on of the " - f"following strings: {value}" + f"The value at {path} should be on of the following strings: {value}" ) elif log_type == ValidationProblem.MissingRequiredGroup: logger.warning(f"The required group, {path}, hasn't been supplied.") @@ -344,7 +342,7 @@ def get_all_defined_required_children_for_elem(xml_element): list_of_children_to_add.add(f"{name_to_add}/@units") elif tag == "group": nxdlpath = ( - f'{xml_element.get("nxdlpath")}/{get_nxdl_name_from_elem(child)}' + f"{xml_element.get('nxdlpath')}/{get_nxdl_name_from_elem(child)}" ) nxdlbase = xml_element.get("nxdlbase") nx_name = nxdlbase[nxdlbase.rfind("/") + 1 : nxdlbase.rfind(".nxdl")] diff --git a/src/pynxtools/dataconverter/readers/example/reader.py b/src/pynxtools/dataconverter/readers/example/reader.py index 3e3fd09af..fefe37f5c 100644 --- a/src/pynxtools/dataconverter/readers/example/reader.py +++ b/src/pynxtools/dataconverter/readers/example/reader.py @@ -106,22 +106,16 @@ def read( # virtual datasets slicing my_path = str(f"{os.path.dirname(__file__)}/../../../data/") - template[("/ENTRY[entry]" "/test_virtual" "_dataset/sliced" "_dataset")] = { - "link": ( - f"{my_path}/xarray_saved_small_" "calibration.h5:/binned/BinnedData" - ), + template[("/ENTRY[entry]/test_virtual_dataset/sliced_dataset")] = { + "link": (f"{my_path}/xarray_saved_small_calibration.h5:/binned/BinnedData"), "shape": np.index_exp[:, 1, :, :], } - template[("/ENTRY[entry]" "/test_virtual" "_dataset/slic" "ed_dataset2")] = { - "link": ( - f"{my_path}/xarray_saved_small" "_calibration.h5:/binned/BinnedData" - ), + template[("/ENTRY[entry]/test_virtual_dataset/sliced_dataset2")] = { + "link": (f"{my_path}/xarray_saved_small_calibration.h5:/binned/BinnedData"), "shape": np.index_exp[:, :, :, 1], } - template[("/ENTRY[entry]" "/test_virtual" "_dataset/slic" "ed_dataset3")] = { - "link": ( - f"{my_path}/xarray_saved_small" "_calibration.h5:/binned/BinnedData" - ), + template[("/ENTRY[entry]/test_virtual_dataset/sliced_dataset3")] = { + "link": (f"{my_path}/xarray_saved_small_calibration.h5:/binned/BinnedData"), "shape": np.index_exp[:, :, :, 2:4], } diff --git a/src/pynxtools/dataconverter/readers/json_map/reader.py b/src/pynxtools/dataconverter/readers/json_map/reader.py index aa8664df3..7d0b2bd8d 100644 --- a/src/pynxtools/dataconverter/readers/json_map/reader.py +++ b/src/pynxtools/dataconverter/readers/json_map/reader.py @@ -56,7 +56,7 @@ def get_val_nested_keystring_from_dict(keystring, data): return data[current_key].values if isinstance(data[current_key], xarray.core.dataset.Dataset): raise NotImplementedError( - "Xarray datasets are not supported. " "You can only use xarray dataarrays." + "Xarray datasets are not supported. You can only use xarray dataarrays." ) return data[current_key] diff --git a/src/pynxtools/nexus/nexus.py b/src/pynxtools/nexus/nexus.py index fec2eda66..ee106c7d6 100644 --- a/src/pynxtools/nexus/nexus.py +++ b/src/pynxtools/nexus/nexus.py @@ -439,7 +439,7 @@ def process_node(hdf_node, hdf_path, parser, logger, doc=True): if len(hdf_node.shape) <= 1 else str(decode_if_string(hdf_node[0])).split("\n") ) - logger.debug(f'value: {val[0]} {"..." if len(val) > 1 else ""}') + logger.debug(f"value: {val[0]} {'...' if len(val) > 1 else ''}") else: logger.debug( f"===== GROUP (/{hdf_path} " @@ -460,7 +460,7 @@ def process_node(hdf_node, hdf_path, parser, logger, doc=True): for key, value in hdf_node.attrs.items(): logger.debug(f"===== ATTRS (/{hdf_path}@{key})") val = str(decode_if_string(value)).split("\n") - logger.debug(f'value: {val[0]} {"..." if len(val) > 1 else ""}') + logger.debug(f"value: {val[0]} {'...' if len(val) > 1 else ''}") (req_str, nxdef, nxdl_path) = get_nxdl_doc(hdf_info, logger, doc, attr=key) if ( parser is not None diff --git a/src/pynxtools/testing/nexus_conversion.py b/src/pynxtools/testing/nexus_conversion.py index ea33ac946..ffe0e98f2 100644 --- a/src/pynxtools/testing/nexus_conversion.py +++ b/src/pynxtools/testing/nexus_conversion.py @@ -103,9 +103,9 @@ def convert_to_nexus( """ Test the example data for the reader plugin. """ - assert hasattr( - self.reader, "supported_nxdls" - ), f"Reader{self.reader} must have supported_nxdls attribute" + assert hasattr(self.reader, "supported_nxdls"), ( + f"Reader{self.reader} must have supported_nxdls attribute" + ) assert callable(self.reader.read), f"Reader{self.reader} must have read method" if isinstance(self.files_or_dir, (list, tuple)): diff --git a/src/pynxtools/testing/nomad_example.py b/src/pynxtools/testing/nomad_example.py index 59ae61998..9dd23f7e8 100644 --- a/src/pynxtools/testing/nomad_example.py +++ b/src/pynxtools/testing/nomad_example.py @@ -124,6 +124,6 @@ def example_upload_entry_point_valid( os.path.abspath(os.path.join(dirpath, filename)) ) - assert ( - sorted(real_upload_files) == sorted(expected_upload_files) - ), f"Uploaded files {real_upload_files} do not match the expected files: {expected_upload_files}" + assert sorted(real_upload_files) == sorted(expected_upload_files), ( + f"Uploaded files {real_upload_files} do not match the expected files: {expected_upload_files}" + ) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index b8fed848d..5e1a99a83 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -172,13 +172,13 @@ def fixture_filled_test_data(template, tmp_path): template["/ENTRY[my_entry]/program_name"] = "Testing program" template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/type"] = "2nd type" template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value"] = ( - "2022-01-22T12" ":14:12.05018+00:00" + "2022-01-22T12:14:12.05018+00:00" ) template["/ENTRY[my_entry]/required_group/description"] = "An example description" template["/ENTRY[my_entry]/required_group2/description"] = "An example description" template["/ENTRY[my_entry]/does/not/exist"] = "random" template["/ENTRY[my_entry]/links/ext_link"] = { - "link": f"{tmp_path}/" f"xarray_saved_small_cali" f"bration.h5:/axes/ax3" + "link": f"{tmp_path}/xarray_saved_small_calibration.h5:/axes/ax3" } return template diff --git a/tests/nexus/test_nexus.py b/tests/nexus/test_nexus.py index 6656ee666..5965f5b73 100644 --- a/tests/nexus/test_nexus.py +++ b/tests/nexus/test_nexus.py @@ -124,12 +124,12 @@ def test_decode_if_string(string_obj, decode, expected): # Handle np.ndarray outputs if isinstance(expected, np.ndarray): - assert isinstance( - result, np.ndarray - ), f"Expected ndarray, but got {type(result)}" - assert ( - result == expected - ).all(), f"Failed for {string_obj} with decode={decode}" + assert isinstance(result, np.ndarray), ( + f"Expected ndarray, but got {type(result)}" + ) + assert (result == expected).all(), ( + f"Failed for {string_obj} with decode={decode}" + ) # Handle list outputs elif isinstance(expected, list): assert isinstance(result, list), f"Expected list, but got {type(result)}" From ed34b3df4a315b981ae383d8aa38b911eea7d2cf Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 14 Feb 2025 13:28:55 +0100 Subject: [PATCH 28/31] ruffing --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index fcdb87d12..168170419 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -181,7 +181,7 @@ regex==2024.11.6 # via mkdocs-material requests==2.32.3 # via mkdocs-material -ruff==0.8.2 +ruff==0.9.6 # via pynxtools (pyproject.toml) scipy==1.14.1 # via ase From 6073efff0710c9cc20649ea50072a2fdcbbaad00 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 14 Feb 2025 13:58:51 +0100 Subject: [PATCH 29/31] linting --- src/pynxtools/nomad/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py index 3e25da7c5..2174d7c47 100644 --- a/src/pynxtools/nomad/utils.py +++ b/src/pynxtools/nomad/utils.py @@ -16,7 +16,7 @@ # limitations under the License. # -from typing import Optional +from typing import Dict, Optional import numpy as np @@ -93,7 +93,7 @@ def get_quantity_base_name(quantity_name): ) -__FIELD_STATISTICS = { +__FIELD_STATISTICS: Dict[str, list] = { "suffix": ["__mean", "__std", "__min", "__max", "__size", "__ndim"], "function": [np.mean, np.std, np.min, np.max, np.size, np.ndim], "type": [np.float64, np.float64, None, None, np.int32, np.int32], From 8879fd9806d5d79921b1614a3d30a59f6422469f Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 14 Feb 2025 16:07:45 +0100 Subject: [PATCH 30/31] fix for tests --- tests/nomad/test_parsing.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/nomad/test_parsing.py b/tests/nomad/test_parsing.py index b1dda4bb3..e050448a7 100644 --- a/tests/nomad/test_parsing.py +++ b/tests/nomad/test_parsing.py @@ -72,12 +72,10 @@ def test_nexus_example(): assert data.energies__field.check("eV") # manual name resolution assert data.AXISNAME__field["angles__field"] is not None - assert ( - data.AXISNAME__field["angles__field"].attributes["nx_data_max"] - == 2.168025463513032 - ) + assert data.AXISNAME__max["angles__max"].value == 2.168025463513032 assert (1 * data.AXISNAME__field["angles__field"].unit).check("1/Å") assert (1 * data.AXISNAME__field["delays__field"].unit).check("fs") + assert data.___axes == "['angles', 'energies', 'delays']" def test_same_name_field_and_group(): From 0ff536fde76309c7d3626c6fa63537d77de9fe84 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 14 Feb 2025 16:31:12 +0100 Subject: [PATCH 31/31] adjust to nomad's python requirement --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index b69d94773..0bd4e1fce 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -34,7 +34,7 @@ jobs: curl -LsSf https://astral.sh/uv/install.sh | sh uv pip install coverage coveralls - name: Install nomad - if: "${{ matrix.python_version != '3.8'}}" + if: "${{ matrix.python_version != '3.8' && matrix.python_version != '3.9'}}" run: | uv pip install nomad-lab[infrastructure]@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git - name: Install pynx